diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..7cf221dc1
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,250 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignArrayOfStructures: None
+AlignConsecutiveAssignments:
+  Enabled:         true
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    true
+AlignConsecutiveBitFields:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignConsecutiveDeclarations:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignConsecutiveMacros:
+  Enabled:         true
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignEscapedNewlines: Left
+AlignOperands:   Align
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortEnumsOnASingleLine: true
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortLambdasOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: WithoutElse
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+AttributeMacros:
+  - __capability
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: Never
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  BeforeLambdaBody: false
+  BeforeWhile:     false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeConceptDeclarations: Always
+BreakBeforeBraces: Linux
+BreakBeforeInheritanceComma: false
+BreakInheritanceList: BeforeColon
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+QualifierAlignment: Leave
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: true
+DisableFormat:   false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+PackConstructorInitializers: NextLine
+BasedOnStyle:    ''
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+AllowAllConstructorInitializersOnNextLine: true
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IfMacros:
+  - KJ_IF_MAYBE
+IncludeBlocks:   Regroup
+IncludeCategories:
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '.*'
+    Priority:        3
+    SortPriority:    0
+    CaseSensitive:   false
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseLabels: true
+IndentCaseBlocks: false
+IndentGotoLabels: true
+IndentPPDirectives: BeforeHash
+IndentExternBlock: AfterExternBlock
+IndentRequiresClause: true
+IndentWidth:     2
+IndentWrappedFunctionNames: false
+InsertBraces:    false
+InsertTrailingCommas: None
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+LambdaBodyIndentation: Signature
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Never
+ObjCBlockIndentWidth: 2
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PenaltyIndentedWhitespace: 0
+PointerAlignment: Left
+PPIndentWidth:   -1
+RawStringFormats:
+  - Language:        Cpp
+    Delimiters:
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions:
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+      - ParseTestProto
+      - ParsePartialTestProto
+    CanonicalDelimiter: pb
+    BasedOnStyle:    google
+ReferenceAlignment: Pointer
+ReflowComments:  true
+RemoveBracesLLVM: false
+RequiresClausePosition: OwnLine
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SortIncludes:    CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+  AfterControlStatements: true
+  AfterForeachMacros: true
+  AfterFunctionDefinitionName: false
+  AfterFunctionDeclarationName: false
+  AfterIfMacros:   true
+  AfterOverloadedOperator: false
+  AfterRequiresInClause: false
+  AfterRequiresInExpression: false
+  BeforeNonEmptyParentheses: false
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  Never
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInLineCommentPrefix:
+  Minimum:         1
+  Maximum:         -1
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+BitFieldColonSpacing: Both
+Standard:        c++17
+StatementAttributeLikeMacros:
+  - Q_EMIT
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        8
+UseCRLF:         false
+UseTab:          Never
+WhitespaceSensitiveMacros:
+  - STRINGIZE
+  - PP_STRINGIZE
+  - BOOST_PP_STRINGIZE
+  - NS_SWIFT_NAME
+  - CF_SWIFT_NAME
+...
+
diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 000000000..be331deed
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,189 @@
+# This settings file for clang-tidy chooses which checks to run, the settings
+# for those checks, etc. It uses as many of the default values as possible and
+# runs all checks with some exclusions by default.
+#
+# The full list of clang-tidy 17 checks and documentation can be found
+# [here](https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/clang-tidy/index.html)
+#
+# The "Checks" command should have 5 sections seperated by a newline:
+# 1. Turn on all checks by default. Done with "*"
+# 2. Turn off the catagories of checks we don't want
+# 3. Turn off specific, individual checks we don't want
+# 4. Turn on checks that we do want from from the catagories of checks that we
+#    didn't want
+# 5. Turn off the checks that we do want but that aren't passing yet
+---
+Checks: "*,
+
+        -abseil-*,
+        -altera-*,
+        -android-*,
+        -boost-*,
+        -darwin-*,
+        -fuchsia-*,
+        -linuxkernel-*,
+        -llvmlibc-*,
+        -*objc*,
+        -*osx*,
+        -zircon-*,
+
+        -bugprone-easily-swappable-parameters,
+        -modernize-use-trailing-return-type,
+        -readability-avoid-const-params-in-decls,
+        -readability-static-accessed-through-instance,
+        -misc-unused-parameters,
+        -hicpp-multiway-paths-covered,
+        -cert-err58-cpp,
+        -cert-dcl37-c,
+        -cert-dcl51-cpp,
+        -cppcoreguidelines-pro-bounds-constant-array-index,
+        -google-readability-braces-around-statements,
+        -hicpp-braces-around-statements,
+
+        google-readability-avoid-underscore-in-googletest-name,
+        google-upgrade-googletest-case,
+
+        -bugprone-empty-catch,
+        -bugprone-implicit-widening-of-multiplication-result,
+        -bugprone-narrowing-conversions,
+        -bugprone-switch-missing-default-case,
+        -cert-env33-c,
+        -cert-err33-c,
+        -cert-err34-c,
+        -cert-msc32-c,
+        -cert-msc51-cpp,
+        -clang-analyzer-core.CallAndMessage,
+        -clang-analyzer-core.NullDereference,
+        -clang-analyzer-core.UndefinedBinaryOperatorResult,
+        -clang-analyzer-core.uninitialized.ArraySubscript,
+        -clang-analyzer-core.uninitialized.UndefReturn,
+        -clang-analyzer-deadcode.DeadStores,
+        -clang-analyzer-optin.performance.Padding,
+        -clang-analyzer-security.insecureAPI.strcpy,
+        -clang-diagnostic-format,
+        -clang-diagnostic-logical-not-parentheses,
+        -clang-diagnostic-macro-redefined,
+        -clang-diagnostic-unknown-cuda-version,
+        -clang-diagnostic-unused-command-line-argument,
+        -clang-diagnostic-unused-result,
+        -concurrency-mt-unsafe,
+        -cppcoreguidelines-avoid-c-arrays,
+        -cppcoreguidelines-avoid-const-or-ref-data-members,
+        -cppcoreguidelines-avoid-do-while,
+        -cppcoreguidelines-avoid-magic-numbers,
+        -cppcoreguidelines-avoid-non-const-global-variables,
+        -cppcoreguidelines-explicit-virtual-functions,
+        -cppcoreguidelines-init-variables,
+        -cppcoreguidelines-macro-usage,
+        -cppcoreguidelines-narrowing-conversions,
+        -cppcoreguidelines-no-malloc,
+        -cppcoreguidelines-non-private-member-variables-in-classes,
+        -cppcoreguidelines-owning-memory,
+        -cppcoreguidelines-prefer-member-initializer,
+        -cppcoreguidelines-pro-bounds-array-to-pointer-decay,
+        -cppcoreguidelines-pro-bounds-pointer-arithmetic,
+        -cppcoreguidelines-pro-type-cstyle-cast,
+        -cppcoreguidelines-pro-type-member-init,
+        -cppcoreguidelines-pro-type-reinterpret-cast,
+        -cppcoreguidelines-pro-type-vararg,
+        -cppcoreguidelines-special-member-functions,
+        -cppcoreguidelines-use-default-member-init,
+        -cppcoreguidelines-virtual-class-destructor,
+        -google-explicit-constructor,
+        -google-global-names-in-headers,
+        -google-readability-casting,
+        -google-readability-namespace-comments,
+        -google-readability-todo,
+        -google-runtime-int,
+        -hicpp-avoid-c-arrays,
+        -hicpp-deprecated-headers,
+        -hicpp-explicit-conversions,
+        -hicpp-member-init,
+        -hicpp-no-array-decay,
+        -hicpp-no-malloc,
+        -hicpp-special-member-functions,
+        -hicpp-use-auto,
+        -hicpp-use-equals-default,
+        -hicpp-use-noexcept,
+        -hicpp-use-nullptr,
+        -hicpp-use-override,
+        -hicpp-vararg,
+        -llvm-else-after-return,
+        -llvm-header-guard,
+        -llvm-include-order,
+        -llvm-namespace-comment,
+        -misc-const-correctness,
+        -misc-header-include-cycle,
+        -misc-include-cleaner,
+        -misc-non-private-member-variables-in-classes,
+        -misc-use-anonymous-namespace,
+        -modernize-avoid-c-arrays,
+        -modernize-deprecated-headers,
+        -modernize-macro-to-enum,
+        -modernize-redundant-void-arg,
+        -modernize-type-traits,
+        -modernize-type-traits,
+        -modernize-use-auto,
+        -modernize-use-default-member-init,
+        -modernize-use-equals-default,
+        -modernize-use-nodiscard,
+        -modernize-use-noexcept,
+        -modernize-use-nullptr,
+        -modernize-use-override,
+        -modernize-use-using,
+        -openmp-use-default-none,
+        -performance-avoid-endl,
+        -performance-unnecessary-value-param,
+        -readability-container-size-empty,
+        -readability-convert-member-functions-to-static,
+        -readability-delete-null-pointer,
+        -readability-duplicate-include,
+        -readability-else-after-return,
+        -readability-function-cognitive-complexity,
+        -readability-identifier-length,
+        -readability-implicit-bool-conversion,
+        -readability-inconsistent-declaration-parameter-name,
+        -readability-isolate-declaration,
+        -readability-magic-numbers,
+        -readability-make-member-function-const,
+        -readability-non-const-parameter,
+        -readability-redundant-control-flow,
+        -readability-redundant-preprocessor,
+        -readability-suspicious-call-argument"
+WarningsAsErrors: ''
+HeaderFilterRegex: '.*'
+FormatStyle:     'file'
+UseColor: false
+CheckOptions:
+  readability-braces-around-statements.ShortStatementLines: 1
+  # readability-identifier-naming allowed casing types
+  # - lower_case
+  # - UPPER_CASE
+  # - camelBack
+  # - CamelCase
+  # - camel_Snake_Back
+  # - Camel_Snake_Case
+  # - aNy_CasE
+
+  # readability-identifier-naming.VariableCase: 'lower_case'
+  # readability-identifier-naming.FunctionCase: 'Camel_Snake_Case'
+  readability-identifier-naming.NamespaceCase: 'lower_case'
+  # readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE'
+  # readability-identifier-naming.TypedefCase: 'CamelCase'
+  # readability-identifier-naming.TypeAliasCase: 'CamelCase'
+  readability-identifier-naming.EnumCase: 'CamelCase'
+  # readability-identifier-naming.ConstantCase: 'lower_case'
+
+  # readability-identifier-naming.ConstantPrefix: 'k_'
+  # readability-identifier-naming.GlobalVariablePrefix: 'g_'
+
+  readability-identifier-naming.ClassCase: 'CamelCase'
+  # readability-identifier-naming.MemberCase: 'lower_case' # This entry might not be needed
+  # readability-identifier-naming.MethodCase: 'CamelCase' # This entry might not be needed
+  # readability-identifier-naming.PrivateMemberSuffix: '_'
+  # readability-identifier-naming.PrivateMethodSuffix: '_'
+
+  hicpp-signed-bitwise.IgnorePositiveIntegerLiterals: 'true'
+
+  bugprone-reserved-identifier.AllowedIdentifiers: '__cudaSafeCall;__cudaCheckError;__shfl_down;__CHOLLA_PRETTY_FUNC__'
+...
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 000000000..51e66225d
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,19 @@
+# To tell git blame to ignore these commits run this command in the repo.
+# `git config blame.ignoreRevsFile .git-blame-ignore-revs`
+# Requires git v2.23 or greater. Each entry must include the full 40 character
+# hash
+
+# Strip all trailing whitespace
+40fcc44334cc92572beb726961e23beb6be8ae2f
+
+# Source Files Reorganization
+50ce61188d43f778e5a31a28b95bbc7312a5bbfb
+b78d8c96680c9c2d5a5d41656895cb3795e1e204
+
+# Reformat Code with clang-format
+729ef8ed307eaa2cf42baa1f5af6c389ad614ac4
+fcaa4714241ad764d9ae38159cac5618e59178c8
+
+# Reformat Code with clang-format increasing column width to 120
+b779b212b24ed19592ac309eab1c3ccb7ba66212
+8e5b4619734e0922d815f4d259323c68002af6db
diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_and_lint.yml
similarity index 61%
rename from .github/workflows/build_tests.yml
rename to .github/workflows/build_and_lint.yml
index 19bdabb44..a4308ebd3 100644
--- a/.github/workflows/build_tests.yml
+++ b/.github/workflows/build_and_lint.yml
@@ -1,4 +1,7 @@
-name: Cholla Compile
+name: Build & Lint
+
+# This runs the HIP Builds. CUDA builds can be reenabled by adding the CUDA
+# container to the matrix and uncommenting the CUDA lines
 
 on:
   pull_request:
@@ -9,15 +12,15 @@ on:
 jobs:
   Build:
     name: >
-      Build
+      Build & Lint:
       ${{ matrix.container.name }}
       TYPE=${{ matrix.make-type }}
     # if: ${{ false }}  # If uncommented this line will disable this job
 
     # Choose OS/Runner
     runs-on: ubuntu-latest
-    container: 
-      image: ${{matrix.container.link}} 
+    container:
+      image: ${{matrix.container.link}}
     defaults:
       run:
         shell: bash
@@ -25,16 +28,13 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        make-type: [hydro, gravity, disk, particles, cosmology, mhd]
-        container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},]
+        make-type: [hydro, gravity, disk, particles, cosmology, mhd, dust, cooling]
+        # The CUDA container can be added with {name: "CUDA", link: "docker://chollahydro/cholla:cuda_github"}
+        container: [{name: "HIP",link: "docker://chollahydro/cholla:rocm_github"}]
 
     # Setup environment variables
     env:
-      CHOLLA_MACHINE: github
       CHOLLA_MAKE_TYPE: ${{ matrix.make-type }}
-      CUDA_ROOT: /usr/local/cuda
-      HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial
-      MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi
 
     # Run the job itself
     steps:
@@ -53,22 +53,22 @@ jobs:
         git --version
         git config --global --add safe.directory /__w/cholla/cholla
         git config --global --add safe.directory '*'
-    - name: Show CUDA and gcc version
-      if: matrix.container.name == 'CUDA'
-      run: |
-        cc --version
-        c++ --version
-        nvcc -V
+    # - name: Show CUDA and gcc version
+    #   if: matrix.container.name == 'CUDA'
+    #   run: |
+    #     cc --version
+    #     c++ --version
+    #     nvcc -V
     - name: Show HIP and hipcc version
       if: matrix.container.name == 'HIP'
       run: |
         hipcc --version
         hipconfig --full
-    
 
     # Perform Build
     - name: Cholla setup
       run: |
+        make clobber
         source builds/run_tests.sh
         setupTests -c gcc
         echo "CHOLLA_ROOT           = ${CHOLLA_ROOT}"
@@ -77,11 +77,6 @@ jobs:
         echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV
         echo "F_OFFLOAD=${F_OFFLOAD}                          >> $GITHUB_ENV
         echo "CHOLLA_ENVSET=${CHOLLA_ENVSET}                  >> $GITHUB_ENV
-    - name: Build GoogleTest
-      run: |
-        source builds/run_tests.sh
-        buildGoogleTest
-        echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV
     - name: Build Cholla
       run: |
         source builds/run_tests.sh
@@ -90,3 +85,17 @@ jobs:
       run: |
         source builds/run_tests.sh
         buildChollaTests
+
+    # Run Clang-tidy
+    # - name: Run clang-tidy
+    #   if: matrix.container.name == 'CUDA'
+    #   run: make tidy TYPE=${{ matrix.make-type }} CLANG_TIDY_ARGS="--warnings-as-errors=*"
+    # - name: Display tidy_results_cpp.log
+    #   if: ${{ (matrix.container.name == 'CUDA') && (always()) }}
+    #   run: cat tidy_results_cpp.log
+    # - name: Display tidy_results_c.log
+    #   if: ${{ (matrix.container.name == 'CUDA') && (always()) }}
+    #   run: cat tidy_results_c.log
+    # - name: Display tidy_results_gpu.log
+    #   if: ${{ (matrix.container.name == 'CUDA') && (always()) }}
+    #   run: cat tidy_results_gpu.log
diff --git a/.github/workflows/code_formatting.yml b/.github/workflows/code_formatting.yml
new file mode 100644
index 000000000..6176efac3
--- /dev/null
+++ b/.github/workflows/code_formatting.yml
@@ -0,0 +1,28 @@
+name: Code Formatting
+
+on: [pull_request, push]
+
+jobs:
+  cpp-format:
+    runs-on: ubuntu-latest
+
+    # Setup environment variables
+    env:
+      CLANG_FORMAT_VERSION: 17
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install clang-format
+        run: |
+          wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
+          sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-${{ env.CLANG_FORMAT_VERSION }} main"
+          sudo apt install clang-format-${{ env.CLANG_FORMAT_VERSION }}
+          sudo ln --symbolic --force /usr/bin/clang-format-${{ env.CLANG_FORMAT_VERSION }} /usr/bin/clang-format
+      - name: Verify clang-format installation
+        run: |
+          clang-format-${{ env.CLANG_FORMAT_VERSION }} --version
+          which clang-format-${{ env.CLANG_FORMAT_VERSION }}
+          clang-format --version
+          which clang-format
+      - name: Check if files are properly formatted
+        run: tools/clang-format_runner.sh --dry-run --Werror
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 936f8ebbb..864a8ab2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,8 @@
 # Files specific to this repo #
 ##############################
-cholla*
 googletest*
+tidy_results*.txt
+bin/*
 
 # Compiled source #
 ###################
@@ -15,6 +16,7 @@ googletest*
 *.a
 a.out
 *.dSYM
+__pycache__
 
 # Makefiles #
 #############
@@ -24,7 +26,7 @@ makefile.summit
 *.o
 
 ##executable
-cholla*
+cholla.*
 
 ## input files
 #parameter_file.txt
@@ -38,10 +40,9 @@ data
 out.*
 o.*
 run
-
-
 disk.*
-
+*.gcno
+*.gcda
 
 # Logs and databases #
 ######################
@@ -66,7 +67,6 @@ disk.*
 # OS generated files #
 ######################
 .DS_Store
-
 .remote-sync.json
 .remote-sync_macos.json
 ._*
@@ -84,4 +84,3 @@ Thumbs.db
 #############################
 docs/doxygen/build
 docs/sphinx/build
-
diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 000000000..8928df96c
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,121 @@
+pipeline
+{
+    agent none
+
+    environment
+    {
+        CHOLLA_ROOT           = "${env.WORKSPACE}"
+        CHOLLA_MACHINE        = 'crc'
+        CHOLLA_LAUNCH_COMMAND = 'mpirun -np'
+    }
+
+    stages
+    {
+        stage('BuildAndTest')
+        {
+            matrix
+            {
+                agent
+                {
+                    label
+                    {
+                        label 'eschneider-ppc-n4'
+                        customWorkspace "${env.JOB_NAME}/${env.CHOLLA_MAKE_TYPE}"
+                    }
+                }
+
+                axes
+                {
+                    axis
+                    {
+                        name 'CHOLLA_MAKE_TYPE'
+                        values 'hydro', 'gravity', 'disk', 'particles', 'cosmology', 'mhd', 'dust', 'cooling'
+                    }
+                }
+
+                stages
+                {
+                    stage('Clone Repo Cholla')
+                    {
+                        steps
+                        {
+                            sh  '''
+                                git submodule update --init --recursive
+                                make clobber
+                                '''
+                        }
+                    }
+                    stage('Build Cholla')
+                    {
+                        steps
+                        {
+                            sh  '''
+                                source builds/run_tests.sh
+                                setupTests -c gcc -t ${CHOLLA_MAKE_TYPE}
+
+                                buildCholla OPTIMIZE
+                                '''
+                        }
+                    }
+                    stage('Build Tests')
+                    {
+                        steps
+                        {
+                            sh  '''
+                                source builds/run_tests.sh
+                                setupTests -c gcc -t ${CHOLLA_MAKE_TYPE}
+
+                                buildChollaTests
+                                '''
+                        }
+                    }
+                    stage('Run Tests')
+                    {
+                        steps
+                        {
+                            sh  '''
+                                source builds/run_tests.sh
+                                setupTests -c gcc -t ${CHOLLA_MAKE_TYPE}
+
+                                runTests
+                                '''
+                        }
+                    }
+                    stage('Run Clang Tidy')
+                    {
+                        steps
+                        {
+                            catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
+                                sh  '''
+                                    source builds/run_tests.sh
+                                    setupTests -c gcc -t ${CHOLLA_MAKE_TYPE}
+
+                                    module load clang/17.0.1
+                                    make tidy CLANG_TIDY_ARGS="--warnings-as-errors=*" TYPE=${CHOLLA_MAKE_TYPE}
+                                    '''
+                            }
+                        }
+                    }
+                    stage('Show Tidy Results')
+                    {
+                        steps
+                        {
+                            // Print the clang-tidy results with bars of equal
+                            // signs seperating each file
+                            sh  '''
+                                printf '=%.0s' {1..100}
+                                printf "\n"
+                                cat tidy_results_cpp_${CHOLLA_MAKE_TYPE}.log
+                                printf '=%.0s' {1..100}
+                                printf "\n"
+                                cat tidy_results_gpu_${CHOLLA_MAKE_TYPE}.log
+                                printf '=%.0s' {1..100}
+                                printf "\n"
+                                '''
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/Makefile b/Makefile
index b4975b1ea..c444ae4a8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,4 @@
+SHELL = /usr/bin/env bash
 #-- Set default include makefile
 MACHINE ?= $(shell builds/machine.sh)
 TYPE    ?= hydro
@@ -10,67 +11,78 @@ CUDA_ARCH ?= sm_70
 
 DIRS     := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle src/cosmology \
             src/cpu src/global src/gravity src/gravity/paris src/grid src/hydro \
-            src/integrators src/io src/main.cpp src/main_tests.cpp \
+            src/integrators src/io src/main.cpp src/main_tests.cpp src/mhd\
             src/model src/mpi src/old_cholla src/particles src/reconstruction \
-            src/riemann_solvers src/system_tests src/utils
+            src/riemann_solvers src/system_tests src/utils src/dust
 
 SUFFIX ?= .$(TYPE).$(MACHINE)
 
-CFILES   := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.c))
 CPPFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cpp))
 GPUFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cu))
 
 # Build a list of all potential object files so cleaning works properly
-CLEAN_OBJS := $(subst .c,.o,$(CFILES)) \
-              $(subst .cpp,.o,$(CPPFILES)) \
+CLEAN_OBJS := $(subst .cpp,.o,$(CPPFILES)) \
               $(subst .cu,.o,$(GPUFILES))
 
-# Set testing related lists and variables
+# Check if it should include testing flags
 ifeq ($(TEST), true)
-  # This is a test build so lets clear out Cholla's main file and set
-  # appropriate compiler flags, suffix, etc
+  ADD_TEST_FLAGS = yes
   $(info Building Tests...)
   $(info )
-  SUFFIX    := $(strip $(SUFFIX)).tests
   CPPFILES  := $(filter-out src/main.cpp,$(CPPFILES))
-  LIBS      += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp
-  TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include
-  CFLAGS   = $(TEST_FLAGS)
-  CXXFLAGS = $(TEST_FLAGS)
-  GPUFLAGS = $(TEST_FLAGS)
-
+  # HACK
   # Set the build flags to debug. This is mostly to avoid the approximations
   # made by Ofast which break std::isnan and std::isinf which are required for
   # the testing
   BUILD = DEBUG
+endif
+ifeq ($(MAKECMDGOALS), tidy)
+	ADD_TEST_FLAGS = yes
+endif
+
+# Set testing related lists and variables
+ifeq ($(ADD_TEST_FLAGS), yes)
+  # This is a test build so lets clear out Cholla's main file and set
+  # appropriate compiler flags, suffix, etc
+  SUFFIX    := $(strip $(SUFFIX)).tests
+  LIBS      += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp
+  TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include
+  CXXFLAGS += $(TEST_FLAGS)
+  GPUFLAGS += $(TEST_FLAGS)
 else
   # This isn't a test build so clear out testing related files
-  CFILES   := $(filter-out src/system_tests/% %_tests.c,$(CFILES))
   CPPFILES := $(filter-out src/system_tests/% %_tests.cpp,$(CPPFILES))
   CPPFILES := $(filter-out src/utils/testing_utilities.cpp,$(CPPFILES))
   GPUFILES := $(filter-out src/system_tests/% %_tests.cu,$(GPUFILES))
 endif
 
-OBJS     := $(subst .c,.o,$(CFILES)) \
-            $(subst .cpp,.o,$(CPPFILES)) \
+ifeq ($(COVERAGE), true)
+  CXXFLAGS += --coverage
+endif
+
+OBJS     := $(subst .cpp,.o,$(CPPFILES)) \
             $(subst .cu,.o,$(GPUFILES))
 
 #-- Set default compilers and flags
-CC                ?= cc
 CXX               ?= CC
 
-CFLAGS_OPTIMIZE   ?= -g -Ofast
-CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++14
-GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++14
+CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++17
+GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++17
+
+CXXFLAGS_DEBUG    ?= -g -O0 -std=c++17
+ifdef HIPCONFIG
+  GPUFLAGS_DEBUG    ?= -g -O0 -std=c++17
+else
+  GPUFLAGS_DEBUG    ?= -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx
+endif
+
 BUILD             ?= OPTIMIZE
 
-CFLAGS            += $(CFLAGS_$(BUILD))
 CXXFLAGS          += $(CXXFLAGS_$(BUILD))
 GPUFLAGS          += $(GPUFLAGS_$(BUILD))
 
 #-- Add flags and libraries as needed
 
-CFLAGS   += $(DFLAGS) -Isrc
 CXXFLAGS += $(DFLAGS) -Isrc
 GPUFLAGS += $(DFLAGS) -Isrc
 
@@ -89,6 +101,13 @@ ifeq ($(findstring -DPARIS,$(DFLAGS)),-DPARIS)
   endif
 endif
 
+ifeq ($(findstring -DSUPERNOVA,$(DFLAGS)),-DSUPERNOVA)
+    ifdef HIPCONFIG
+	CXXFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include
+	GPUFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include
+    endif
+endif
+
 ifeq ($(findstring -DHDF5,$(DFLAGS)),-DHDF5)
   CXXFLAGS += -I$(HDF5_ROOT)/include
   GPUFLAGS += -I$(HDF5_ROOT)/include
@@ -117,6 +136,7 @@ ifdef HIPCONFIG
   DFLAGS    += -DO_HIP
   CXXFLAGS  += $(HIPCONFIG)
   GPUCXX    ?= hipcc
+  #GPUFLAGS  += -Wall
   LD        := $(CXX)
   LDFLAGS   := $(CXXFLAGS) -L$(ROCM_PATH)/lib
   LIBS      += -lamdhip64
@@ -144,7 +164,7 @@ ifeq ($(findstring -DCHEMISTRY_GPU,$(DFLAGS)),-DCHEMISTRY_GPU)
   DFLAGS += -DSCALAR
 endif
 
-.SUFFIXES: .c .cpp .cu .o
+.SUFFIXES: .cpp .cu .o
 
 EXEC := bin/cholla$(SUFFIX)
 
@@ -154,28 +174,56 @@ DFLAGS      += -DGIT_HASH='"$(shell git rev-parse --verify HEAD)"'
 MACRO_FLAGS := -DMACRO_FLAGS='"$(DFLAGS)"'
 DFLAGS      += $(MACRO_FLAGS)
 
+# Setup variables for clang-tidy
+LIBS_CLANG_TIDY     := $(subst -I/, -isystem /,$(LIBS))
+# This tells clang-tidy that the path after each -isystem command is a system library so that it can be easily ignored by the header filter regex
+LIBS_CLANG_TIDY     += -isystem $(MPI_ROOT)/include -isystem $(HDF5_ROOT)/include
+CXXFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(LDFLAGS))
+GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS))
+GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS_CLANG_TIDY))
+GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes
+CPPFILES_TIDY := $(CPPFILES)
+GPUFILES_TIDY := $(GPUFILES)
+
+ifdef TIDY_FILES
+  CPPFILES_TIDY := $(filter $(TIDY_FILES), $(CPPFILES_TIDY))
+  GPUFILES_TIDY := $(filter $(TIDY_FILES), $(GPUFILES_TIDY))
+endif
+
 $(EXEC): prereq-build $(OBJS)
 	mkdir -p bin/ && $(LD) $(LDFLAGS) $(OBJS) -o $(EXEC) $(LIBS)
 	eval $(EXTRA_COMMANDS)
 
-%.o: %.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
 %.o: %.cpp
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
 %.o: %.cu
 	$(GPUCXX) $(GPUFLAGS) -c $< -o $@
 
-.PHONY: clean
+.PHONY: clean, clobber, tidy, format
+
+format:
+	tools/clang-format_runner.sh
+
+tidy:
+# Flags we might want
+# - --warnings-as-errors=<string> Upgrade all warnings to error, good for CI
+	clang-tidy --verify-config
+	@echo -e
+	(time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp_$(TYPE).log 2>&1 & \
+	(time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu_$(TYPE).log 2>&1 & \
+	for i in 1 2; do wait -n; done
+	@echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp_$(TYPE).log' and 'tidy_results_gpu_$(TYPE).log' files."
 
 clean:
 	rm -f $(CLEAN_OBJS)
 	rm -rf googletest
 	-find bin/ -type f -executable -name "cholla.*.$(MACHINE)*" -exec rm -f '{}' \;
+	-find src/ -type f -name "*.gcno" -delete
+	-find src/ -type f -name "*.gcda" -delete
 
 clobber: clean
-	find . -type f -executable -name "cholla*" -exec rm -f '{}' \;
+	-find bin/ -type f -executable -name "cholla*" -exec rm -f '{}' \;
 	-find bin/ -type d -name "t*" -prune -exec rm -rf '{}' \;
 	rm -rf bin/cholla.*tests*.xml
 
diff --git a/README.md b/README.md
index c9d731b89..a61ce0ad6 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
-![Compile](https://github.com/cholla-hydro/cholla/actions/workflows/build_tests.yml/badge.svg)
+![Build & Lint](https://github.com/cholla-hydro/cholla/actions/workflows/build_and_lint.yml/badge.svg)
+![Code Formatting](https://github.com/cholla-hydro/cholla/actions/workflows/code_formatting.yml/badge.svg)
 
 CHOLLA
 ============
diff --git a/builds/make.host.c3po b/builds/make.host.c3po
index 588a14861..dc5e3d8eb 100644
--- a/builds/make.host.c3po
+++ b/builds/make.host.c3po
@@ -1,12 +1,9 @@
 #-- Compiler and flags for different build type
-CC                = mpicc
 CXX               = mpicxx
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14 ${F_OFFLOAD}
-CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++14 ${F_OFFLOAD}
-GPUFLAGS_DEBUG    = -g -O0 -std=c++14 -ccbin=mpicxx
-GPUFLAGS_OPTIMIZE = -g -O3 -std=c++14 -ccbin=mpicxx
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17 ${F_OFFLOAD}
+CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 ${F_OFFLOAD}
+GPUFLAGS_DEBUG    = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx -Xcompiler -rdynamic
+GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx
 
 OMP_NUM_THREADS   = 7
 
diff --git a/builds/make.host.crc b/builds/make.host.crc
index 6378cc80e..e0c20e162 100644
--- a/builds/make.host.crc
+++ b/builds/make.host.crc
@@ -1,11 +1,9 @@
 
 #-- Compiler and flags for different build type
-CC                = mpicc
 CXX               = mpicxx
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -Ofast
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17
+GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17
 CUDA_ARCH       = sm_70
 OMP_NUM_THREADS   = 16
 
diff --git a/builds/make.host.frontier b/builds/make.host.frontier
index 14aae5d38..bae874c78 100644
--- a/builds/make.host.frontier
+++ b/builds/make.host.frontier
@@ -1,21 +1,18 @@
 #-- make.host for Frontier at the OLCF with
 #-- Compiler and flags for different build type
-CC                = cc
 CXX               = CC
 #GPUCXX           ?= CC -x hip
 GPUCXX           ?= hipcc
 
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 -Wno-unused-result
 
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++14 -Wno-unused-result
-
-GPUFLAGS          = --offload-arch=gfx90a -Wno-unused-result
+GPUFLAGS_OPTIMIZE = -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result
+GPUFLAGS_DEBUG    = -g -O0 -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result
 HIPCONFIG	  = -I$(ROCM_PATH)/include $(shell hipconfig -C) # workaround for Rocm 5.2 warnings
 #HIPCONFIG	  = $(shell hipconfig -C)
 
-OMP_NUM_THREADS   = 8
+OMP_NUM_THREADS   = 7
 #-- How to launch job
 JOB_LAUNCH        = srun -u -A STF016 -n 1 -c 8
 
diff --git a/builds/make.host.github b/builds/make.host.github
index acc003aad..46da09349 100644
--- a/builds/make.host.github
+++ b/builds/make.host.github
@@ -1,18 +1,16 @@
 #-- Compiler and flags for different build type
-CC                = mpicc
 CXX               = mpicxx
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14 ${F_OFFLOAD}
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD}
-GPUFLAGS_DEBUG    = -std=c++14
-GPUFLAGS_OPTIMIZE = -std=c++14
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17 ${F_OFFLOAD}
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD}
+GPUFLAGS_DEBUG    = -g -G -cudart shared -O0 -std=c++17
+GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17
 
 OMP_NUM_THREADS   = 7
 
 #-- Library
 ifdef HIPCONFIG
 	HIPCONFIG := -I$(shell hipconfig -R)/include $(shell hipconfig -C)
+	GPUFLAGS_DEBUG = -g -O0 -std=c++17
 endif
 CUDA_ROOT       := $(CUDA_ROOT)
 HDF5_ROOT       := $(HDF5_ROOT)
@@ -24,15 +22,8 @@ GOOGLETEST_ROOT := ${GOOGLETEST_ROOT}
 #-- MPI calls accept GPU buffers (requires GPU-aware MPI)
 # MPI_GPU = -DMPI_GPU
 
-ifndef HIPCONFIG
-	GPUFLAGS_DEBUG    += -g -O0 -ccbin=mpicxx
-	GPUFLAGS_OPTIMIZE += -g -O3 -ccbin=mpicxx
-endif
-
 ifdef HIPCONFIG
 	MPI_ROOT := ${MPI_ROOT}
-	CFLAGS_DEBUG      += -fPIE
-	CFLAGS_OPTIMIZE   += -fPIE
 	CXXFLAGS_DEBUG    += -fPIE
 	CXXFLAGS_OPTIMIZE += -fPIE
 	GPUFLAGS_DEBUG    += -fPIE
diff --git a/builds/make.host.lux b/builds/make.host.lux
index b8af559c1..edf4e42c0 100644
--- a/builds/make.host.lux
+++ b/builds/make.host.lux
@@ -1,14 +1,10 @@
 #-- make.inc for the Shamrock Server
 
 #-- Compiler and flags for different build type
-CC                = mpicc
 CXX               = mpicxx
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14
-GPUFLAGS         = -std=c++14
-
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17
+GPUFLAGS         = -std=c++17
 
 OMP_NUM_THREADS = 10
 
diff --git a/builds/make.host.poplar b/builds/make.host.poplar
index 726ec788c..f029e09e6 100644
--- a/builds/make.host.poplar
+++ b/builds/make.host.poplar
@@ -1,12 +1,10 @@
 #-- make.inc for Poplar, COE cluster at HPE
 
 #-- Compiler and flags for different build type
-CC                = cc
 CXX               = CC
-CFLAGS_DEBUG      = -g -O0 ${F_OFFLOAD}
-CFLAGS_OPTIMIZE   = -Ofast ${F_OFFLOAD}
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14 ${F_OFFLOAD}
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD}
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17 ${F_OFFLOAD}
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD}
+
 GPUFLAGS          = --offload-arch=gfx906,gfx908
 HIPCONFIG         = $(shell hipconfig -C)
 
diff --git a/builds/make.host.poplar.aomp b/builds/make.host.poplar.aomp
index 984e432c2..e87fe68e2 100644
--- a/builds/make.host.poplar.aomp
+++ b/builds/make.host.poplar.aomp
@@ -1,13 +1,11 @@
 #-- make.inc for Poplar, COE cluster at HPE
 
 #-- Compiler and flags for different build type
-CC                = mpicc
 CXX               = mpicxx
 HIPCONFIG         = $(shell hipconfig -C)
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -Ofast
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17
+
 GPUFLAGS          = --offload-arch=gfx906,gfx908
 LIBS              = -lm -lstdc++
 
diff --git a/builds/make.host.poplar.cce+hip b/builds/make.host.poplar.cce+hip
index 49b2e6256..b83268e12 100644
--- a/builds/make.host.poplar.cce+hip
+++ b/builds/make.host.poplar.cce+hip
@@ -1,13 +1,11 @@
 #-- make.inc for Poplar, HPE COE cluster
 
 #-- Compiler and flags for different build type
-CC                = cc
 CXX               = CC
 HIPCONFIG         = $(shell hipconfig -C)
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -Ofast
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17
+
 GPUFLAGS          = --offload-arch=gfx906,gfx908
 
 #-- How to launch job
diff --git a/builds/make.host.shamrock b/builds/make.host.shamrock
index fb6c63b94..eec8d48e6 100644
--- a/builds/make.host.shamrock
+++ b/builds/make.host.shamrock
@@ -1,14 +1,11 @@
 #-- make.inc for the Lux Cluster
 
 #-- Compiler and flags for different build type
-CC                = mpicc
 CXX               = mpicxx
 #CC                = gcc
 #CXX               = g++
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17
 
 
 OMP_NUM_THREADS = 10
diff --git a/builds/make.host.spock b/builds/make.host.spock
index a4b031788..9dfc41676 100644
--- a/builds/make.host.spock
+++ b/builds/make.host.spock
@@ -1,14 +1,10 @@
 #-- make.inc for Spock EAS at the OLCF with
 
 #-- Compiler and flags for different build type
-CC                = cc
 CXX               = CC
 
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
-
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++14
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17
 
 GPUFLAGS          = --offload-arch=gfx908
 HIPCONFIG	  = $(shell hipconfig -C)
diff --git a/builds/make.host.summit b/builds/make.host.summit
index 2d557be26..a9f5337f5 100644
--- a/builds/make.host.summit
+++ b/builds/make.host.summit
@@ -2,14 +2,11 @@
 #   https://www.olcf.ornl.gov/summit/
 
 #-- Compiler and flags for different build type
-CC                = mpicc
 CXX               = mpicxx
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14 ${F_OFFLOAD}
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD}
-GPUFLAGS_DEBUG    = -g -O0 -std=c++14 -ccbin=mpicxx
-GPUFLAGS_OPTIMIZE = -g -O3 -std=c++14 -ccbin=mpicxx
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17 ${F_OFFLOAD}
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD}
+GPUFLAGS_DEBUG    = -g -O0 -std=c++17 -ccbin=mpicxx -G -cudart shared
+GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx
 
 OMP_NUM_THREADS   = 7
 
diff --git a/builds/make.host.tornado b/builds/make.host.tornado
index df938d8e7..cdcf5483f 100644
--- a/builds/make.host.tornado
+++ b/builds/make.host.tornado
@@ -1,12 +1,9 @@
 #-- make.inc for the Lux Cluster
 
 #-- Compiler and flags for different build type
-CC                = gcc
 CXX               = g++
-CFLAGS_DEBUG      = -g -O0
-CFLAGS_OPTIMIZE   = -g -O2
-CXXFLAGS_DEBUG    = -g -O0 -std=c++14
-CXXFLAGS_OPTIMIZE = -Ofast -std=c++14
+CXXFLAGS_DEBUG    = -g -O0 -std=c++17
+CXXFLAGS_OPTIMIZE = -Ofast -std=c++17
 
 
 OMP_NUM_THREADS = 10
diff --git a/builds/make.inc.template b/builds/make.inc.template
index 22fbd663d..abfa97d1e 100644
--- a/builds/make.inc.template
+++ b/builds/make.inc.template
@@ -1,13 +1,8 @@
 #POISSON_SOLVER ?= -DPFFT
 #DFLAGS += $(POISSON_SOLVER)
 
-
-#To use GPUs, CUDA must be turned on here
-#Optional error checking can also be enabled
-DFLAGS += -DCUDA #-DCUDA_ERROR_CHECK
-
 #To use MPI, DFLAGS must include -DMPI_CHOLLA
-DFLAGS += -DMPI_CHOLLA -DBLOCK
+DFLAGS += -DMPI_CHOLLA
 
 #Set the MPI Processes grid [nproc_x, nproc_y, nproc_z]
 #DFLAGS += -DSET_MPI_GRID
@@ -33,8 +28,8 @@ DFLAGS += -DHDF5
 # Reconstruction
 #DFLAGS += -DPCM
 #DFLAGS += -DPLMP
-#DFLAGS += -DPLMC
-DFLAGS += -DPPMP
+DFLAGS += -DPLMC
+#DFLAGS += -DPPMP
 #DFLAGS += -DPPMC
 
 # Riemann Solver
@@ -65,7 +60,7 @@ DFLAGS += -DTEMPERATURE_FLOOR
 #DFLAGS += -DDYNAMIC_GPU_ALLOC
 
 # Set the cooling function
-#DFLAGS += -DCOOLING_GPU 
+#DFLAGS += -DCOOLING_GPU
 #DFLAGS += -DCLOUDY_COOL
 
 # Use Tiled Iitial Conditions for Scaling Tets
diff --git a/builds/make.type.basic_scalar b/builds/make.type.basic_scalar
new file mode 100644
index 000000000..02706b223
--- /dev/null
+++ b/builds/make.type.basic_scalar
@@ -0,0 +1,33 @@
+#-- Default hydro build with BASIC_SCALAR
+
+DFLAGS    += -DMPI_CHOLLA
+DFLAGS    += -DPRECISION=2
+DFLAGS    += -DPLMC
+DFLAGS    += -DHLLC
+
+# Integrator
+# DFLAGS    += -DSIMPLE
+DFLAGS    += -DVL
+
+# Apply a density and temperature floor
+DFLAGS    += -DDENSITY_FLOOR
+DFLAGS    += -DTEMPERATURE_FLOOR
+
+# Toggle scalar fields in general
+DFLAGS += -DSCALAR
+# Toggle Basic scalar field
+DFLAGS += -DBASIC_SCALAR
+
+# Solve the Gas Internal Energy usisng a Dual Energy Formalism
+#DFLAGS    += -DDE
+
+# Apply cooling on the GPU from precomputed tables
+#DFLAGS    += -DCOOLING_GPU
+
+# Measure the Timing of the different stages
+#DFLAGS    += -DCPU_TIME
+
+# Select output format
+# Can also add -DSLICES and -DPROJECTIONS
+OUTPUT    ?=  -DOUTPUT -DHDF5
+DFLAGS    += $(OUTPUT)
diff --git a/builds/make.type.cloudy b/builds/make.type.cloudy
index e604ff818..10fa51d60 100644
--- a/builds/make.type.cloudy
+++ b/builds/make.type.cloudy
@@ -6,9 +6,7 @@ OUTPUT    ?=  -DOUTPUT -DHDF5
 
 MPI_GPU   ?=
 
-DFLAGS    += -DCUDA
 DFLAGS    += -DMPI_CHOLLA
-DFLAGS    += -DBLOCK
 DFLAGS    += -DPRECISION=2
 DFLAGS    += -DPPMP
 DFLAGS    += -DHLLC
diff --git a/builds/make.type.cooling b/builds/make.type.cooling
index baf4ed0e9..0b96722a0 100644
--- a/builds/make.type.cooling
+++ b/builds/make.type.cooling
@@ -6,9 +6,7 @@ OUTPUT    ?=  -DOUTPUT -DHDF5
 
 MPI_GPU   ?=
 
-DFLAGS    += -DCUDA
 DFLAGS    += -DMPI_CHOLLA
-DFLAGS    += -DBLOCK
 DFLAGS    += -DPRECISION=2
 DFLAGS    += -DPPMP
 DFLAGS    += -DHLLC
diff --git a/builds/make.type.disk b/builds/make.type.disk
index a142a4756..47bb22829 100644
--- a/builds/make.type.disk
+++ b/builds/make.type.disk
@@ -1,37 +1,47 @@
-MPI_GPU = 
+MPI_GPU = -DMPI_GPU
 DFLAGS += -DPARTICLES
-DFLAGS += -DPARTICLES_CPU
-DFLAGS += -DONLY_PARTICLES
+#DFLAGS += -DPARTICLES_CPU
+DFLAGS += -DPARTICLES_GPU
+#DFLAGS += -DONLY_PARTICLES
 DFLAGS += -DPARTICLE_IDS
-DFLAGS += -DSINGLE_PARTICLE_MASS
+#DFLAGS += -DSINGLE_PARTICLE_MASS
+DFLAGS += -DPARTICLE_AGE
+DFLAGS += -DSUPERNOVA  #this flag requires PARTICLE_AGE, PARTICLE_IDS
+DFLAGS += -DANALYSIS
+#DFLAGS += -DPARTICLES_KDK
 
 
 DFLAGS += -DGRAVITY
+DFLAGS += -DGRAVITY_GPU
 # Use both -DSOR and -DPARIS_GALACTIC to run analytic test and compare solutions
-DFLAGS += -DSOR
+#DFLAGS += -DSOR
 DFLAGS += -DPARIS_GALACTIC
 DFLAGS += -DGRAVITY_ANALYTIC_COMP
+DFLAGS += -DGRAVITY_5_POINTS_GRADIENT
 
+#DFLAGS += -DSTATIC_GRAV
 
-DFLAGS    += -DCUDA
-DFLAGS    += -DMPI_CHOLLA 
-DFLAGS    += -DBLOCK
+DFLAGS    += -DMPI_CHOLLA
 DFLAGS    += -DPRECISION=2
-DFLAGS    += -DPPMP
+DFLAGS    += -DPLMC
 DFLAGS    += -DHLLC
 DFLAGS    += -DVL
 
-#DFLAGS += -DDISK_ICS
+DFLAGS += -DDISK_ICS
 
 DFLAGS    += -DDENSITY_FLOOR
 DFLAGS    += -DTEMPERATURE_FLOOR
+DFLAGS    += -DCOOLING_GPU
+#DFLAGS    += -DCLOUDY_COOL
 DFLAGS    += -DDE
 DFLAGS    += -DCPU_TIME
+DFLAGS    += -DAVERAGE_SLOW_CELLS
+DFLAGS    += -DHYDRO_GPU
 
-OUTPUT    ?=  -DOUTPUT -DHDF5
+OUTPUT    ?=  -DOUTPUT -DHDF5 -DSLICES -DPROJECTION
 DFLAGS    += $(OUTPUT)
 
-DFLAGS    += $(MPI_GPU)  
+DFLAGS    += $(MPI_GPU)
 
 DFLAGS += -DPARALLEL_OMP
 DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS)
diff --git a/builds/make.type.dust b/builds/make.type.dust
new file mode 100644
index 000000000..1669a4077
--- /dev/null
+++ b/builds/make.type.dust
@@ -0,0 +1,46 @@
+#-- Default hydro + dust
+
+#-- separated output flag so that it can be overriden in target-specific
+#   for make check
+OUTPUT    ?=  -DOUTPUT -DHDF5
+
+MPI_GPU   ?=
+
+DFLAGS    += -DMPI_CHOLLA
+DFLAGS    += -DPRECISION=2
+DFLAGS    += -DPLMC
+DFLAGS    += -DHLLC
+
+DFLAGS    += -DDE
+DFLAGS    += -DAVERAGE_SLOW_CELLS
+DFLAGS    += -DTEMPERATURE_FLOOR
+DFLAGS    += -DDENSITY_FLOOR
+
+DFLAGS    += -DVL
+
+# Evolve additional scalars
+DFLAGS    += -DSCALAR
+DFLAGS    += -DSCALAR_FLOOR
+
+# Define dust macro
+DFLAGS    += -DDUST
+
+# Apply the cooling in the GPU from precomputed tables
+DFLAGS    += -DCOOLING_GPU
+DFLAGS    += -DCLOUDY_COOLING
+
+#Measure the Timing of the different stages
+#DFLAGS   += -DCPU_TIME
+
+DFLAGS    += -DSLICES
+DFLAGS    += -DPROJECTION
+
+DFLAGS    += $(OUTPUT)
+
+DFLAGS    += -DOUTPUT_ALWAYS
+
+#Select if the Hydro Conserved data will reside in the GPU
+#and the MPI transfers are done from the GPU
+#If not specified, MPI_GPU is off by default
+#This is set in the system make.host file
+DFLAGS    += $(MPI_GPU)
\ No newline at end of file
diff --git a/builds/make.type.hydro b/builds/make.type.hydro
index 5824e6deb..9e9b1d77c 100644
--- a/builds/make.type.hydro
+++ b/builds/make.type.hydro
@@ -1,14 +1,16 @@
 #-- Default hydro only build
 
-DFLAGS    += -DCUDA
 DFLAGS    += -DMPI_CHOLLA
 DFLAGS    += -DPRECISION=2
-DFLAGS    += -DPPMC
+DFLAGS    += -DPLMC
 DFLAGS    += -DHLLC
 
 # Integrator
+ifeq ($(findstring cosmology,$(TYPE)),cosmology)
 DFLAGS    += -DSIMPLE
-#DFLAGS    += -DVL
+else
+DFLAGS    += -DVL
+endif
 
 # Apply a density and temperature floor
 DFLAGS    += -DDENSITY_FLOOR
@@ -27,4 +29,3 @@ DFLAGS    += -DTEMPERATURE_FLOOR
 # Can also add -DSLICES and -DPROJECTIONS
 OUTPUT    ?=  -DOUTPUT -DHDF5
 DFLAGS    += $(OUTPUT)
-
diff --git a/builds/make.type.mhd b/builds/make.type.mhd
index 3f67ea88f..6348c173e 100644
--- a/builds/make.type.mhd
+++ b/builds/make.type.mhd
@@ -6,18 +6,14 @@ OUTPUT    ?=  -DOUTPUT -DHDF5
 
 MPI_GPU   ?=
 
-DFLAGS    += -DCUDA
 DFLAGS    += -DMPI_CHOLLA
 DFLAGS    += -DPRECISION=2
-DFLAGS    += -DPPMP
+DFLAGS    += -DPLMC
 DFLAGS    += -DHLLD
 DFLAGS    += -DMHD
 
-ifeq ($(findstring cosmology,$(TYPE)),cosmology)
-DFLAGS    += -DSIMPLE
-else
+# MHD only supports the Van Leer integrator
 DFLAGS    += -DVL
-endif
 
 # need this if using Disk_3D
 # DFLAGS += -DDISK_ICS
@@ -35,7 +31,7 @@ DFLAGS    += -DTEMPERATURE_FLOOR
 # Apply the cooling in the GPU from precomputed tables
 # DFLAGS    += -DCOOLING_GPU
 
-#Measure the Timing of the different stages
+# Measure the Timing of the different stages
 DFLAGS += -DCPU_TIME
 
 DFLAGS    += $(OUTPUT)
@@ -45,3 +41,12 @@ DFLAGS    += $(OUTPUT)
 #If not specified, MPI_GPU is off by default
 #This is set in the system make.host file
 DFLAGS    += $(MPI_GPU)
+
+# Disable CUDA error checking
+# DFLAGS += -DDISABLE_GPU_ERROR_CHECKING
+
+# NOTE: The following macros are to help facilitate debugging and should not be
+# used on scientific runs
+
+# Limit the number of steps to evolve.
+# DFLAGS += -DN_STEPS_LIMIT=1000
diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj
index e6faa7514..76eea26d8 100644
--- a/builds/make.type.rot_proj
+++ b/builds/make.type.rot_proj
@@ -1,14 +1,13 @@
 #-- Default hydro only build with rotated projection
 
-DFLAGS    += -DCUDA
 DFLAGS    += -DMPI_CHOLLA
 DFLAGS    += -DPRECISION=2
-DFLAGS    += -DPPMC
+DFLAGS    += -DPLMC
 DFLAGS    += -DHLLC
 
 # Integrator
-DFLAGS    += -DSIMPLE
-#DFLAGS    += -DVL
+# DFLAGS    += -DSIMPLE
+DFLAGS    += -DVL
 
 # Apply a density and temperature floor
 DFLAGS    += -DDENSITY_FLOOR
@@ -28,4 +27,4 @@ DFLAGS    += -DTEMPERATURE_FLOOR
 OUTPUT    ?=  -DOUTPUT -DHDF5
 DFLAGS    += $(OUTPUT)
 
-DFLAGS    += -DROTATED_PROJECTION
\ No newline at end of file
+DFLAGS    += -DROTATED_PROJECTION
diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav
index ffa15c4ee..2c17f7e8b 100644
--- a/builds/make.type.static_grav
+++ b/builds/make.type.static_grav
@@ -1,14 +1,13 @@
 #-- Default hydro only build with static_grav
 
-DFLAGS    += -DCUDA
 DFLAGS    += -DMPI_CHOLLA
 DFLAGS    += -DPRECISION=2
-DFLAGS    += -DPPMC
+DFLAGS    += -DPLMC
 DFLAGS    += -DHLLC
 
 # Integrator
-DFLAGS    += -DSIMPLE
-#DFLAGS    += -DVL
+# DFLAGS    += -DSIMPLE
+DFLAGS    += -DVL
 
 # Apply a density and temperature floor
 DFLAGS    += -DDENSITY_FLOOR
@@ -29,4 +28,3 @@ DFLAGS    += -DSTATIC_GRAV
 # Can also add -DSLICES and -DPROJECTIONS
 OUTPUT    ?=  -DOUTPUT -DHDF5
 DFLAGS    += $(OUTPUT)
-
diff --git a/builds/run_tests.sh b/builds/run_tests.sh
index bca41e411..0fc1ed629 100755
--- a/builds/run_tests.sh
+++ b/builds/run_tests.sh
@@ -54,6 +54,8 @@ setupTests ()
     return 1
   fi
 
+  builtin cd $CHOLLA_ROOT
+
   # Determine the hostname then use that to pick the right machine name and launch
   # command
   if [[ -n ${CHOLLA_MACHINE+x} ]]; then
@@ -94,10 +96,6 @@ setupTests ()
       ;;
   esac
 
-  # Clean the cholla directory
-  builtin cd $CHOLLA_ROOT
-  make clobber
-
   # Source the setup file
   source "${CHOLLA_ROOT}/builds/setup.${CHOLLA_MACHINE}${CHOLLA_COMPILER}.sh"
 }
@@ -110,7 +108,7 @@ buildCholla ()
 {
   echo -e "\nBuilding Cholla...\n"
   builtin cd $CHOLLA_ROOT
-  make -j TYPE=${CHOLLA_MAKE_TYPE}
+  make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} COVERAGE=${2}
 }
 # ==============================================================================
 
@@ -121,7 +119,7 @@ buildChollaTests ()
 {
   echo
   builtin cd $CHOLLA_ROOT
-  make -j TYPE=${CHOLLA_MAKE_TYPE} TEST=true
+  make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} TEST=true COVERAGE=${1}
 }
 # ==============================================================================
 
@@ -205,6 +203,51 @@ runTests ()
 }
 # ==============================================================================
 
+# ==============================================================================
+# This function generates a coverage report after the tests have been run.
+# The final report is a website in bin/html_coverage_report_${CHOLLA_MAKE_TYPE}
+chollaCoverage ()
+{
+  # Setup the names of files that we will use
+  local base_file="bin/coverage_base_${CHOLLA_MAKE_TYPE}.info"
+  local test_file="bin/coverage_test_${CHOLLA_MAKE_TYPE}.info"
+  local combined_file="bin/coverage_combined_${CHOLLA_MAKE_TYPE}.info"
+
+  # Generate the initial report with no coverage info. This is needed so that
+  # lcov knows about all the files, not just the ones that are tested
+  lcov --capture --initial --directory ${CHOLLA_ROOT}/src --output-file ${base_file}
+
+  # Now we get the actual coverage information
+  lcov --capture  --directory ${CHOLLA_ROOT}/src --output-file ${test_file}
+
+  # Then combine the the two coverage files so we know what changed, i.e. which
+  # lines were actually covered
+  lcov --add-tracefile ${base_file} --add-tracefile ${test_file} --output-file ${combined_file}
+
+  # Extract data from only the files within CHOLLA_ROOT. This should exclude any
+  # system or external libraries
+  lcov --extract ${combined_file} "${CHOLLA_ROOT}/*" --output-file ${combined_file}
+
+  # exclude_patterns=('*-tests.cpp') # Remove traces of the tests themselves
+  # # --remove TRACEFILE PATTERN = remove all things associated with PATTERN in TRACEFILE
+  # lcov --remove ${combined_file} "${exclude_patterns[@]}" --output-file ${combined_file}
+
+  # List the contents
+  lcov --list ${combined_file}
+
+  # Generate HTML report
+  genhtml ${combined_file} --output-directory bin/html_coverage_report_${CHOLLA_MAKE_TYPE}
+
+  # Combine all tracefiles together. Define the different make types then add
+  # the appropriate prefixes and suffices.
+  # build_types=(cosmology disk dust gravity hydro mhd particles)
+  # build_types=("${build_types[@]/#/--add-trace bin/coverage_combined_}")
+  # build_types=("${build_types[@]/%/.info}")
+  # eval "build_types=(${build_types[@]})"
+  # lcov "${build_types[@]}" --output-file bin/full_coverage_report.info
+}
+# ==============================================================================
+
 # ==============================================================================
 # Call all the functions required for setting up, building, and running tests
 #
@@ -214,15 +257,20 @@ runTests ()
 # argument is the value of COMPILER which does not occur for all setup scripts
 # \param[in] -g (optional) If set then download and build a local version of
 # GoogleTest to use instead of the machine default
+# \param[in] -d (optional) Build Cholla in debug mode
+# \param[in] -l (optional) Generate coverage reports when building and running Cholla
 buildAndRunTests ()
 {
   # Unset BUILD_GTEST so that subsequent runs aren't tied to what previous runs
   # did
   unset BUILD_GTEST
 
+  BUILD_MODE='OPTIMIZE'
+  CODE_COVERAGE='false'
+
   # Check arguments
   local OPTIND
-  while getopts "t:c:g" opt; do
+  while getopts "t:c:g:d:l" opt; do
     case $opt in
         t)  # Set the make type
             MAKE_TYPE_ARG="-t ${OPTARG}"
@@ -233,6 +281,12 @@ buildAndRunTests ()
         g)  # Build GoogleTest locally?
             BUILD_GTEST=true
             ;;
+        d)  # Build the debug version of Cholla?
+            BUILD_MODE='DEBUG'
+            ;;
+        l)  # Generate Code Coverage?
+            CODE_COVERAGE='true'
+            ;;
         \?)
             echo "Invalid option: -${OPTARG}" >&2
             return 1
@@ -244,13 +298,27 @@ buildAndRunTests ()
     esac
   done
 
+  # Run setup and check if it worked
+  setupTests $MAKE_TYPE_ARG $COMPILER_ARG
+  if [ $? -ne 0 ]; then
+    echo "setup failed"
+    exit 1
+  fi
+
+  # Clean the cholla directory
+  builtin cd $CHOLLA_ROOT
+  make clobber
+
   # Now we get to setting up and building
-  setupTests $MAKE_TYPE_ARG $COMPILER_ARG && \
   if [[ -n $BUILD_GTEST ]]; then
     buildGoogleTest
   fi
-  buildCholla  && \
-  buildChollaTests  && \
+  buildCholla $BUILD_MODE $CODE_COVERAGE && \
+  buildChollaTests $CODE_COVERAGE && \
   runTests
+
+  if [ $CODE_COVERAGE = "true" ]; then
+    chollaCoverage
+  fi
 }
 # ==============================================================================
diff --git a/builds/setup.c3po.gcc.sh b/builds/setup.c3po.gcc.sh
index 24fc6860d..d08360e6b 100755
--- a/builds/setup.c3po.gcc.sh
+++ b/builds/setup.c3po.gcc.sh
@@ -7,5 +7,5 @@ echo "mpicxx --version is: "
 mpicxx --version
 
 # export MPI_GPU="-DMPI_GPU"
-export F_OFFLOAD="-fopenmp -foffload=disable"
+export F_OFFLOAD="-fopenmp"
 export CHOLLA_ENVSET=1
diff --git a/builds/setup.crc.gcc.sh b/builds/setup.crc.gcc.sh
index 586dcbd00..7893b2875 100755
--- a/builds/setup.crc.gcc.sh
+++ b/builds/setup.crc.gcc.sh
@@ -9,5 +9,5 @@ echo "mpicxx --version is: "
 mpicxx --version
 
 # export MPI_GPU="-DMPI_GPU"
-export F_OFFLOAD="-fopenmp -foffload=disable"
+export F_OFFLOAD="-fopenmp"
 export CHOLLA_ENVSET=1
diff --git a/builds/setup.frontier.cce.sh b/builds/setup.frontier.cce.sh
index 4a22344d2..afb251680 100755
--- a/builds/setup.frontier.cce.sh
+++ b/builds/setup.frontier.cce.sh
@@ -15,3 +15,4 @@ export MPICH_GPU_SUPPORT_ENABLED=1
 export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
 
 export CHOLLA_ENVSET=1
+export ROCFFT_RTC_CACHE_PATH=/dev/null
diff --git a/builds/setup.github.gcc.sh b/builds/setup.github.gcc.sh
index fd001f23a..a959b3cea 100755
--- a/builds/setup.github.gcc.sh
+++ b/builds/setup.github.gcc.sh
@@ -4,5 +4,4 @@
 #   source ./setup.c3po.gcc.sh
 
 # export MPI_GPU="-DMPI_GPU"
-export F_OFFLOAD="-fopenmp -foffload=disable"
 export CHOLLA_ENVSET=1
diff --git a/builds/setup.poplar.aomp.sh b/builds/setup.poplar.aomp.sh
index 7b83e5ab7..d692711fe 100755
--- a/builds/setup.poplar.aomp.sh
+++ b/builds/setup.poplar.aomp.sh
@@ -1,12 +1,11 @@
 #!/bin/bash
 
 module purge
-module load craype-x86-naples craype-network-infiniband 
+module load craype-x86-naples craype-network-infiniband
 module load shared slurm
 module use /home/users/twhite/share/modulefiles
 module load ompi/4.0.4-rocm-3.9 hdf5
 
-export OMPI_CC=$(which clang)
 export OMPI_CXX=$(which clang)
 
 export CHOLLA_MACHINE=poplar.aomp
diff --git a/builds/setup.summit.gcc.sh b/builds/setup.summit.gcc.sh
index 81a99dd36..0f15f6bfe 100755
--- a/builds/setup.summit.gcc.sh
+++ b/builds/setup.summit.gcc.sh
@@ -6,6 +6,5 @@
 #module load gcc/10.2.0 cuda/11.4.0 fftw hdf5 python
 module load gcc cuda fftw hdf5 python googletest/1.11.0
 
-#export F_OFFLOAD="-fopenmp -foffload=nvptx-none='-lm -Ofast'"
-export F_OFFLOAD="-fopenmp -foffload=disable"
+export F_OFFLOAD="-fopenmp"
 export CHOLLA_ENVSET=1
diff --git a/cholla-tests-data b/cholla-tests-data
index 66d592821..da5c3a309 160000
--- a/cholla-tests-data
+++ b/cholla-tests-data
@@ -1 +1 @@
-Subproject commit 66d5928213b495c2fef61b0653b90a25ae3aa7cf
+Subproject commit da5c3a309d5451fabdec27fd7942e6121bb9c277
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
new file mode 100644
index 000000000..abecbe2c3
--- /dev/null
+++ b/docker/cuda/Dockerfile
@@ -0,0 +1,29 @@
+FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
+# Needs to be devel, not base or runtime, to have nvcc
+# Ubuntu 22 is better than 18 because Ubuntu 22 default git is > 2.17
+# Github actions requires git > 2.17 so that cholla is pulled into a git repo
+# Which is required for the Makefile
+# With ubuntu 22.04 this grabs 2.34.1
+
+RUN apt-get -y update && apt install -y \
+    cmake \
+    git \
+    gnupg \
+    libgtest-dev \
+    libhdf5-serial-dev \
+    libopenmpi-dev \
+    openmpi-bin \
+    software-properties-common \
+    wget
+
+# Install Clang and Tools
+RUN wget https://apt.llvm.org/llvm.sh && \
+    chmod +x llvm.sh && \
+    echo "\n" | ./llvm.sh 15 all && \
+    find /usr/bin/ -name 'clang*15'  | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c
+
+# Needed by Cholla Makefile
+ENV CHOLLA_MACHINE=github
+ENV CUDA_ROOT=/usr/local/cuda-11/
+ENV HDF5_ROOT=/usr/lib/x86_64-linux-gnu/hdf5/serial/
+ENV MPI_ROOT=/usr/lib/x86_64-linux-gnu/openmpi/
diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile
new file mode 100644
index 000000000..3a7eb66ed
--- /dev/null
+++ b/docker/rocm/Dockerfile
@@ -0,0 +1,36 @@
+FROM rocm/dev-ubuntu-20.04:5.2.3
+
+# Avoid annoying cmake -> tzdata install prompt
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get -y update && apt-get -y install \
+    cmake \
+    git \
+    gnupg \
+    hipfft \
+    libgtest-dev \
+    libhdf5-serial-dev \
+    libopenmpi-dev \
+    openmpi-bin \
+    rocfft \
+    software-properties-common \
+    wget
+
+# Needed to trick ROCm into thinking there's a GPU
+RUN echo "gfx90a" | sudo tee --append $(hipconfig -R)/bin/target.lst
+
+# Install rocRand
+RUN apt-get -y install rocrand
+
+# Install Clang and Tools
+# RUN wget https://apt.llvm.org/llvm.sh && \
+#     chmod +x llvm.sh && \
+#     echo "\n" | ./llvm.sh 15 all && \
+#     find /usr/bin/ -name 'clang*15'  | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c
+
+# Needed by Cholla Makefile
+ENV CHOLLA_MACHINE=github
+ENV HIPCONFIG=/opt/rocm-5.2.3
+ENV ROCM_PATH=/opt/rocm-5.2.3
+ENV HDF5_ROOT=/usr/lib/x86_64-linux-gnu/hdf5/serial
+ENV MPI_ROOT=/usr/lib/x86_64-linux-gnu/openmpi
diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile
index 9c3acb19f..4fedbe262 100644
--- a/docs/doxygen/Doxyfile
+++ b/docs/doxygen/Doxyfile
@@ -908,7 +908,7 @@ FILE_PATTERNS          = *.c \
 # be searched for input files as well.
 # The default value is: NO.
 
-RECURSIVE              = NO
+RECURSIVE              = YES
 
 # The EXCLUDE tag can be used to specify files and/or directories that should be
 # excluded from the INPUT source files. This way you can easily exclude a
@@ -964,7 +964,7 @@ EXAMPLE_PATTERNS       = *
 # irrespective of the value of the RECURSIVE tag.
 # The default value is: NO.
 
-EXAMPLE_RECURSIVE      = NO
+EXAMPLE_RECURSIVE      = YES
 
 # The IMAGE_PATH tag can be used to specify one or more files or directories
 # that contain images that are to be included in the documentation (see the
diff --git a/examples/1D/123.txt b/examples/1D/123.txt
index 79a3b23a3..3f693baa6 100644
--- a/examples/1D/123.txt
+++ b/examples/1D/123.txt
@@ -26,6 +26,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/Creasey_shock.txt b/examples/1D/Creasey_shock.txt
index f7d98d7dc..59821a945 100644
--- a/examples/1D/Creasey_shock.txt
+++ b/examples/1D/Creasey_shock.txt
@@ -26,6 +26,10 @@ zlen=3.08567758e18
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/Shu_Osher.txt b/examples/1D/Shu_Osher.txt
index 5d78eba7d..42d8a7ccb 100644
--- a/examples/1D/Shu_Osher.txt
+++ b/examples/1D/Shu_Osher.txt
@@ -19,15 +19,19 @@ gamma=1.4
 # name of initial conditions
 init=Shu_Osher
 # domain properties
-xmin=0.0
+xmin=-1.0
 ymin=0.0
 zmin=0.0
-xlen=1.0
+xlen=2.0
 ylen=1.0
 zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/noh_1D.txt b/examples/1D/noh_1D.txt
index 3e9552295..d350c2479 100644
--- a/examples/1D/noh_1D.txt
+++ b/examples/1D/noh_1D.txt
@@ -14,7 +14,7 @@ tout=1.0
 # time interval for output
 outstep=1.0
 # name of initial conditions
-init=Riemann_1D
+init=Riemann
 # domain properties
 xmin=0.0
 ymin=0.0
@@ -25,6 +25,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/sound_wave.txt b/examples/1D/sound_wave.txt
index c6555c662..13c6f8d05 100644
--- a/examples/1D/sound_wave.txt
+++ b/examples/1D/sound_wave.txt
@@ -25,23 +25,26 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=1
 xu_bcnd=1
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
 #################################################
 # Parameters for linear wave problems
-# initial density 
+# initial density
 rho=1.0
-# velocity in the x direction 
+# velocity in the x direction
 vx=0
 # velocity in the y direction
 vy=0
 # velocity in the z direction
 vz=0
-# initial pressure 
+# initial pressure
 P=0.6
 # amplitude of perturbing oscillations
 A=1e-4
 # value of gamma
 gamma=1.666666666666667
-
diff --git a/examples/1D/square_wave.txt b/examples/1D/square_wave.txt
index d33805c15..d22282a66 100644
--- a/examples/1D/square_wave.txt
+++ b/examples/1D/square_wave.txt
@@ -26,6 +26,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=1
 xu_bcnd=1
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/stationary.txt b/examples/1D/stationary.txt
index 28941e868..746592847 100644
--- a/examples/1D/stationary.txt
+++ b/examples/1D/stationary.txt
@@ -27,6 +27,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/strong_shock.txt b/examples/1D/strong_shock.txt
index 1726cf316..ff99eab23 100644
--- a/examples/1D/strong_shock.txt
+++ b/examples/1D/strong_shock.txt
@@ -25,6 +25,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/test_3.txt b/examples/1D/test_3.txt
index 60997270c..3eff8abcc 100644
--- a/examples/1D/test_3.txt
+++ b/examples/1D/test_3.txt
@@ -26,6 +26,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/trac_pen.txt b/examples/1D/trac_pen.txt
index 3c0081e5a..a24bf7167 100644
--- a/examples/1D/trac_pen.txt
+++ b/examples/1D/trac_pen.txt
@@ -26,6 +26,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=1
 xu_bcnd=1
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/1D/two_shocks.txt b/examples/1D/two_shocks.txt
index a998bae46..c1ac4616a 100644
--- a/examples/1D/two_shocks.txt
+++ b/examples/1D/two_shocks.txt
@@ -26,6 +26,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
+yl_bcnd=0
+yu_bcnd=0
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/2D/Gresho.txt b/examples/2D/Gresho.txt
index cc645431d..6595c5695 100644
--- a/examples/2D/Gresho.txt
+++ b/examples/2D/Gresho.txt
@@ -17,6 +17,8 @@ outstep=0.05
 gamma=1.4
 # name of initial conditions
 init=Gresho
+# static gravity flag
+custom_grav=1
 # domain properties
 xmin=-0.5
 ymin=-0.5
diff --git a/examples/2D/Noh_2D.txt b/examples/2D/Noh_2D.txt
index 0e43af07d..5223983d5 100644
--- a/examples/2D/Noh_2D.txt
+++ b/examples/2D/Noh_2D.txt
@@ -29,6 +29,8 @@ xl_bcnd=2
 xu_bcnd=4
 yl_bcnd=2
 yu_bcnd=4
+zl_bcnd=0
+zu_bcnd=0
 custom_bcnd=noh
 # path to output directory
 outdir=./
diff --git a/examples/2D/Rayleigh_Taylor.txt b/examples/2D/Rayleigh_Taylor.txt
index 3cf87dbea..919e654e1 100644
--- a/examples/2D/Rayleigh_Taylor.txt
+++ b/examples/2D/Rayleigh_Taylor.txt
@@ -17,6 +17,8 @@ outstep=0.05
 gamma=1.4
 # name of initial conditions
 init=Rayleigh_Taylor
+#static gravity flag
+custom_grav=2
 # domain properties
 xmin=0.0
 ymin=0.0
diff --git a/examples/2D/disk.txt b/examples/2D/disk.txt
index 3dd0ce821..86397f6d9 100644
--- a/examples/2D/disk.txt
+++ b/examples/2D/disk.txt
@@ -17,6 +17,8 @@ outstep=2185.9
 gamma=1.001
 # name of initial conditions
 init=Disk_2D
+# static gravity flag
+custom_grav=3
 # domain properties
 xmin=-20
 ymin=-20
diff --git a/examples/2D/sod.txt b/examples/2D/sod.txt
index 27df1f256..1f60eab77 100644
--- a/examples/2D/sod.txt
+++ b/examples/2D/sod.txt
@@ -27,6 +27,8 @@ xl_bcnd=3
 xu_bcnd=3
 yl_bcnd=3
 yu_bcnd=3
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
 outdir=./
 
diff --git a/examples/2D/sound_wave.txt b/examples/2D/sound_wave.txt
index d69b1270f..109eb8050 100644
--- a/examples/2D/sound_wave.txt
+++ b/examples/2D/sound_wave.txt
@@ -27,24 +27,24 @@ xl_bcnd=1
 xu_bcnd=1
 yl_bcnd=1
 yu_bcnd=1
+zl_bcnd=0
+zu_bcnd=0
 # path to output directory
-#outdir=outputs/
 outdir=./
 
 #################################################
 # Parameters for linear wave problems
-# initial density 
+# initial density
 rho=1.0
-# velocity in the x direction 
+# velocity in the x direction
 vx=0
 # velocity in the y direction
 vy=0
 # velocity in the z direction
 vz=0
-# initial pressure 
+# initial pressure
 P=0.6
 # amplitude of perturbing oscillations
 A=1e-4
 # value of gamma
 gamma=1.666666666666667
-
diff --git a/examples/3D/Brio_and_Wu.txt b/examples/3D/Brio_and_Wu.txt
index a742ae207..c1a9fe387 100644
--- a/examples/3D/Brio_and_Wu.txt
+++ b/examples/3D/Brio_and_Wu.txt
@@ -6,11 +6,11 @@
 
 ################################################
 # number of grid cells in the x dimension
-nx=32
+nx=256
 # number of grid cells in the y dimension
-ny=32
+ny=256
 # number of grid cells in the z dimension
-nz=32
+nz=256
 # final output time
 tout=0.1
 # time interval for output
@@ -68,5 +68,5 @@ Bz_r=0.0
 # location of initial discontinuity
 diaph=0.5
 # value of gamma
-gamma=2
+gamma=2.0
 
diff --git a/examples/3D/Dai_and_Woodward.txt b/examples/3D/Dai_and_Woodward.txt
index 64c5351e6..a266cbb66 100644
--- a/examples/3D/Dai_and_Woodward.txt
+++ b/examples/3D/Dai_and_Woodward.txt
@@ -7,11 +7,11 @@
 
 ################################################
 # number of grid cells in the x dimension
-nx=32
+nx=256
 # number of grid cells in the y dimension
-ny=32
+ny=256
 # number of grid cells in the z dimension
-nz=32
+nz=256
 # final output time
 tout=0.2
 # time interval for output
@@ -43,28 +43,28 @@ outdir=./
 # density of left state
 rho_l=1.08
 # velocity of left state
-vx_l=0.0
-vy_l=0.0
-vz_l=0.0
+vx_l=1.2
+vy_l=0.01
+vz_l=0.5
 # pressure of left state
-P_l=1.0
+P_l=0.95
 # Magnetic field of the left state
-Bx_l=14.17963081
-By_l=12.76166773
-Bz_l=7.0898154
+Bx_l=0.5641895835477563
+By_l=1.0155412503859613
+Bz_l=0.5641895835477563
 
 # density of right state
 rho_r=1.0
 # velocity of right state
 vx_r=0.0
 vy_r=0.0
-vz_r=1.0
+vz_r=0.0
 # pressure of right state
-P_r=0.2
+P_r=1.0
 # Magnetic field of the right state
-Bx_r=14.17963081
-By_r=14.17963081
-Bz_r=7.0898154
+Bx_r=0.5641895835477563
+By_r=1.1283791670955126
+Bz_r=0.5641895835477563
 
 # location of initial discontinuity
 diaph=0.5
diff --git a/examples/3D/KH_res_ind_3D.txt b/examples/3D/KH_res_ind_3D.txt
index ab846867a..2ebe6cda0 100644
--- a/examples/3D/KH_res_ind_3D.txt
+++ b/examples/3D/KH_res_ind_3D.txt
@@ -10,7 +10,7 @@ ny=128
 # number of grid cells in the z dimension
 nz=128
 # final output time
-tout=5.0
+tout=3.0
 # time interval for output
 outstep=0.01
 # value of gamma
diff --git a/examples/3D/Ryu_and_Jones_1a.txt b/examples/3D/Ryu_and_Jones_1a.txt
new file mode 100644
index 000000000..c0c73cced
--- /dev/null
+++ b/examples/3D/Ryu_and_Jones_1a.txt
@@ -0,0 +1,74 @@
+#
+# Parameter File for 3D Ryu & Jones MHD shock tube 1a.
+# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics:
+# Algorithms and Tests for One-Dimensional Flow"
+#
+# Note: There are many shock tubes in this paper. This settings file is
+# specifically for shock tube 1a
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=256
+# number of grid cells in the y dimension
+ny=256
+# number of grid cells in the z dimension
+nz=256
+# final output time
+tout=0.08
+# time interval for output
+outstep=0.08
+# name of initial conditions
+init=Riemann
+
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.0
+# velocity of left state
+vx_l=10.0
+vy_l=0.0
+vz_l=0.0
+# pressure of left state
+P_l=20.0
+# Magnetic field of the left state
+Bx_l=1.4104739588693909
+By_l=1.4104739588693909
+Bz_l=0.0
+
+# density of right state
+rho_r=1.0
+# velocity of right state
+vx_r=-10.0
+vy_r=0.0
+vz_r=0.0
+# pressure of right state
+P_r=1.0
+# Magnetic field of the right state
+Bx_r=1.4104739588693909
+By_r=1.4104739588693909
+Bz_r=0.0
+
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=1.6666666666666667
diff --git a/examples/3D/Ryu_and_Jones_4d.txt b/examples/3D/Ryu_and_Jones_4d.txt
index 68fcbbbb3..6596c2b01 100644
--- a/examples/3D/Ryu_and_Jones_4d.txt
+++ b/examples/3D/Ryu_and_Jones_4d.txt
@@ -9,11 +9,11 @@
 
 ################################################
 # number of grid cells in the x dimension
-nx=32
+nx=64
 # number of grid cells in the y dimension
-ny=32
+ny=64
 # number of grid cells in the z dimension
-nz=32
+nz=64
 # final output time
 tout=0.16
 # time interval for output
diff --git a/examples/3D/Spherical_Collapse.txt b/examples/3D/Spherical_Collapse.txt
index 8fad21920..739661216 100644
--- a/examples/3D/Spherical_Collapse.txt
+++ b/examples/3D/Spherical_Collapse.txt
@@ -32,6 +32,4 @@ yu_bcnd=1
 zl_bcnd=1
 zu_bcnd=1
 # path to output directory
-outdir=/data/groups/comp-astro/bruno/cosmo_sims/sphere_collapse/output_files/
-#outdir=/raid/bruno/data/cosmo_sims/cholla_pm/sphere_collapse/
-#outdir=/gpfs/alpine/scratch/bvilasen/ast149/sphere_collapse/output_files/
+outdir=./
diff --git a/examples/3D/Spherical_Overpressure.txt b/examples/3D/Spherical_Overpressure.txt
index 7fec56a3b..0e77c4452 100644
--- a/examples/3D/Spherical_Overpressure.txt
+++ b/examples/3D/Spherical_Overpressure.txt
@@ -32,5 +32,4 @@ yu_bcnd=1
 zl_bcnd=1
 zu_bcnd=1
 # path to output directory
-#outdir=/gpfs/alpine/scratch/bvilasen/ast149/sphere_explosion/output_files/
-outdir=/raid/bruno/data/cosmo_sims/cholla_pm/sphere_explosion/
+outdir=./
diff --git a/examples/3D/Uniform.txt b/examples/3D/Uniform.txt
index 84fd900f6..e08e76dba 100644
--- a/examples/3D/Uniform.txt
+++ b/examples/3D/Uniform.txt
@@ -32,4 +32,4 @@ yu_bcnd=1
 zl_bcnd=1
 zu_bcnd=1
 # path to output directory
-outdir=/raid/bruno/data/cosmo_sims/cholla_pm/uniform/
+outdir=./
diff --git a/examples/3D/advecting_field_loop.txt b/examples/3D/advecting_field_loop.txt
new file mode 100644
index 000000000..eca9c382e
--- /dev/null
+++ b/examples/3D/advecting_field_loop.txt
@@ -0,0 +1,55 @@
+#
+# Parameter File for an MHD Advecting Field Loop as defined in
+# [Gardiner & Stone 2008](https://ui.adsabs.harvard.edu/abs/2008JCoPh.227.4123G/abstract)
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=128
+# number of grid cells in the y dimension
+ny=128
+# number of grid cells in the z dimension
+nz=256
+# final output time
+tout=2.0
+# time interval for output
+outstep=2.0
+# name of initial conditions
+init=Advecting_Field_Loop
+# domain properties
+xmin=-0.5
+ymin=-0.5
+zmin=-1.0
+xlen=1.0
+ylen=1.0
+zlen=2.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for advecting field loop problem
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=1.0
+# velocity in the y direction
+vy=1.0
+# velocity in the z direction
+vz=2.0
+# initial pressure
+P=1.0
+# amplitude of the loop/magnetic field background value
+A=0.001
+# Radius of the Loop
+radius=0.3
+
+# value of gamma
+gamma=1.666666666666667
+
diff --git a/examples/3D/alfven_wave.txt b/examples/3D/alfven_wave.txt
new file mode 100644
index 000000000..bfacbc968
--- /dev/null
+++ b/examples/3D/alfven_wave.txt
@@ -0,0 +1,71 @@
+#
+# Parameter File for MHD Alfven Wave
+# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/)
+# for details on each wave
+# The right eigenvector for this wave is:
+# (1/3) * [0, 0, +/-1, -/+2*sqrt(2), 0, -1, 2*sqrt(2), 0]
+# The terms with two sign options: use the left one for right moving waves and
+# the right one for left moving waves
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=256
+# number of grid cells in the y dimension
+ny=256
+# number of grid cells in the z dimension
+nz=256
+# final output time
+tout=1.0
+# time interval for output
+outstep=1.0
+# name of initial conditions
+init=Linear_Wave
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for linear wave problems
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=0
+# velocity in the y direction
+vy=0
+# velocity in the z direction
+vz=0
+# initial pressure
+P=0.6
+# magnetic field in the x direction
+Bx=1
+# magnetic field in the y direction
+By=1.5
+# magnetic field in the z direction
+Bz=0
+# amplitude of perturbing oscillations
+A=1e-6
+# value of gamma
+gamma=1.666666666666667
+# The right eigenvectors to set the wave properly
+rEigenVec_rho=0
+rEigenVec_MomentumX=0
+rEigenVec_MomentumY=0
+rEigenVec_MomentumZ=-1
+rEigenVec_Bx=0
+rEigenVec_By=0
+rEigenVec_Bz=1
+rEigenVec_E=0
diff --git a/examples/3D/circularly_polarized_alfven_wave.txt b/examples/3D/circularly_polarized_alfven_wave.txt
new file mode 100644
index 000000000..193f1ac33
--- /dev/null
+++ b/examples/3D/circularly_polarized_alfven_wave.txt
@@ -0,0 +1,48 @@
+#
+# Parameter File for the circularly polarized Alfven Wave
+# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) pages 4134-4135
+# for details.
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=32
+# number of grid cells in the z dimension
+nz=32
+# final output time
+tout=1.0
+# time interval for output
+outstep=1.0
+# name of initial conditions
+init=Circularly_Polarized_Alfven_Wave
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=3.0
+ylen=1.5
+zlen=1.5
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for linear wave problems
+# Polarization. 1 = right polarized, -1 = left polarized
+polarization=1.0
+# velocity in the x direction. 0 for moving wave, -1 for standing wave
+vx=0.0
+# pitch angle
+pitch=0.72972765622696634
+# yaw angle
+yaw=1.1071487177940904
+# value of gamma
+gamma=1.666666666666667
diff --git a/examples/3D/constant.txt b/examples/3D/constant.txt
index ca3b411e2..871fbb7b7 100644
--- a/examples/3D/constant.txt
+++ b/examples/3D/constant.txt
@@ -42,9 +42,9 @@ vz=0
 # pressure
 P=1.380658e-5
 # Magnetic Field
-Bx=0.0
-By=0.0
-Bz=0.0
+Bx=1.0e-5
+By=2.0e-5
+Bz=3.0e-5
 # value of gamma
 gamma=1.666666667
 
diff --git a/examples/3D/fast_magnetosonic.txt b/examples/3D/fast_magnetosonic.txt
new file mode 100644
index 000000000..bc134a79a
--- /dev/null
+++ b/examples/3D/fast_magnetosonic.txt
@@ -0,0 +1,71 @@
+#
+# Parameter File for MHD fast magnetosonic wave
+# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/)
+# for details on each wave.
+# The right eigenvector for this wave is:
+# (1/(6*sqrt(5))) * [6, +/-12, -/+4*sqrt(2), -/+2, 0,  8*sqrt(2), 4, 27]
+# The terms with two sign options: use the left one for right moving waves and
+# the right one for left moving waves
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=256
+# number of grid cells in the y dimension
+ny=256
+# number of grid cells in the z dimension
+nz=256
+# final output time
+tout=0.5
+# time interval for output
+outstep=0.5
+# name of initial conditions
+init=Linear_Wave
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for linear wave problems
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=0
+# velocity in the y direction
+vy=0
+# velocity in the z direction
+vz=0
+# initial pressure
+P=0.6
+# magnetic field in the x direction
+Bx=1
+# magnetic field in the y direction
+By=1.5
+# magnetic field in the z direction
+Bz=0
+# amplitude of perturbing oscillations
+A=1e-6
+# value of gamma
+gamma=1.666666666666667
+# The right eigenvectors to set the wave properly
+rEigenVec_rho=0.4472135954999579
+rEigenVec_MomentumX=0.8944271909999159
+rEigenVec_MomentumY=-0.4472135954999579
+rEigenVec_MomentumZ=0.0
+rEigenVec_Bx=0.0
+rEigenVec_By=0.8944271909999159
+rEigenVec_Bz=0.0
+rEigenVec_E=2.0124611797498106
diff --git a/examples/3D/mhd_blast.txt b/examples/3D/mhd_blast.txt
new file mode 100644
index 000000000..5d078f674
--- /dev/null
+++ b/examples/3D/mhd_blast.txt
@@ -0,0 +1,61 @@
+#
+# Parameter File for the MHD Blast wavelength
+# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details.
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=200
+# number of grid cells in the y dimension
+ny=300
+# number of grid cells in the z dimension
+nz=200
+# final output time
+tout=0.2
+# time interval for output
+outstep=0.2
+# name of initial conditions
+init=MHD_Spherical_Blast
+# domain properties
+xmin=-0.5
+ymin=-0.75
+zmin=-0.5
+xlen=1.0
+ylen=1.5
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for MHD Blast Wave problem
+
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=0.0
+# velocity in the y direction
+vy=0.0
+# velocity in the z direction
+vz=0.0
+# initial pressure outside the blast zone
+P=0.1
+# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo
+P_blast=10.0
+# The radius of the blast zone
+radius=0.1
+# magnetic field in the x direction. Equal to 1/sqrt(2)
+Bx=0.70710678118654746
+# magnetic field in the y direction. Equal to 1/sqrt(2)
+By=0.70710678118654746
+# magnetic field in the z direction
+Bz=0.0
+
+# value of gamma
+gamma=1.666666666666667
diff --git a/examples/3D/mhd_contact_wave.txt b/examples/3D/mhd_contact_wave.txt
new file mode 100644
index 000000000..0ff7e7989
--- /dev/null
+++ b/examples/3D/mhd_contact_wave.txt
@@ -0,0 +1,71 @@
+#
+# Parameter File for MHD contact wave
+# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/)
+# for details on each wave
+# The right eigenvector for this wave is:
+# (1/2) * [2, +/-2, 0, 0, 0, 0, 0, 1]
+# The terms with two sign options: use the left one for right moving waves and
+# the right one for left moving waves
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=256
+# number of grid cells in the y dimension
+ny=256
+# number of grid cells in the z dimension
+nz=256
+# final output time
+tout=1.0
+# time interval for output
+outstep=1.0
+# name of initial conditions
+init=Linear_Wave
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for linear wave problems
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=1
+# velocity in the y direction
+vy=0
+# velocity in the z direction
+vz=0
+# initial pressure
+P=0.6
+# magnetic field in the x direction
+Bx=1
+# magnetic field in the y direction
+By=1.5
+# magnetic field in the z direction
+Bz=0
+# amplitude of perturbing oscillations
+A=1e-6
+# value of gamma
+gamma=1.666666666666667
+# The right eigenvectors to set the wave properly
+rEigenVec_rho=1.0
+rEigenVec_MomentumX=1.0
+rEigenVec_MomentumY=0.0
+rEigenVec_MomentumZ=0.0
+rEigenVec_Bx=0.0
+rEigenVec_By=0.0
+rEigenVec_Bz=0.0
+rEigenVec_E=0.5
diff --git a/examples/3D/orszag_tang_vortex.txt b/examples/3D/orszag_tang_vortex.txt
new file mode 100644
index 000000000..9d8050073
--- /dev/null
+++ b/examples/3D/orszag_tang_vortex.txt
@@ -0,0 +1,42 @@
+#
+# Parameter File for the Orszag-Tang Vortex
+# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634)
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=128
+# number of grid cells in the y dimension
+ny=128
+# number of grid cells in the z dimension
+nz=128
+# final output time
+tout=0.5
+# time interval for output
+outstep=0.5
+# name of initial conditions
+init=Orszag_Tang_Vortex
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for Orszag-Tang Vortex. This problem is defined for a specific set
+# of initial conditions which have been hard coded into the initial conditions
+# function. The only thing that needs set here is the adiabatic index
+
+# value of gamma
+gamma=1.666666666666667
diff --git a/examples/3D/slow_magnetosonic.txt b/examples/3D/slow_magnetosonic.txt
new file mode 100644
index 000000000..960952b5f
--- /dev/null
+++ b/examples/3D/slow_magnetosonic.txt
@@ -0,0 +1,72 @@
+#
+# Parameter File for MHD slow magnetosonic wave
+# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/)
+# for details on each wave
+# The right eigenvector for this wave is:
+# (1/(6*sqrt(5))) * [12, +/-6, +/-8*sqrt(2), +/-4, 0, -4*sqrt(2), -2, 9]
+# The terms with two sign options: use the left one for right moving waves and
+# the right one for left moving waves
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=256
+# number of grid cells in the y dimension
+ny=256
+# number of grid cells in the z dimension
+nz=256
+# final output time
+tout=2.0
+# time interval for output
+outstep=2.0
+# name of initial conditions
+init=Linear_Wave
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for linear wave problems
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=0
+# velocity in the y direction
+vy=0
+# velocity in the z direction
+vz=0
+# initial pressure
+P=0.6
+# magnetic field in the x direction
+Bx=1
+# magnetic field in the y direction
+By=1.5
+# magnetic field in the z direction
+Bz=0
+# amplitude of perturbing oscillations
+A=1e-6
+# value of gamma
+gamma=1.666666666666667
+# The right eigenvectors to set the wave properly
+rEigenVec_rho=0.8944271909999159
+rEigenVec_MomentumX=0.4472135954999579
+rEigenVec_MomentumY=0.8944271909999159
+rEigenVec_MomentumZ=0.0
+rEigenVec_Bx=0.0
+rEigenVec_By=-0.4472135954999579
+rEigenVec_Bz=0.0
+rEigenVec_E=0.6708203932499369
+
diff --git a/examples/3D/sound_wave.txt b/examples/3D/sound_wave.txt
index 0f3866226..6c226c0ab 100644
--- a/examples/3D/sound_wave.txt
+++ b/examples/3D/sound_wave.txt
@@ -34,18 +34,17 @@ outdir=./
 
 #################################################
 # Parameters for linear wave problems
-# initial density 
+# initial density
 rho=1.0
-# velocity in the x direction 
+# velocity in the x direction
 vx=0
 # velocity in the y direction
 vy=0
 # velocity in the z direction
 vz=0
-# initial pressure 
+# initial pressure
 P=0.6
 # amplitude of perturbing oscillations
 A=1e-4
 # value of gamma
 gamma=1.666666666666667
-
diff --git a/examples/scripts/parameter_file.txt b/examples/scripts/parameter_file.txt
index 48652cfe1..1cf3a08ad 100644
--- a/examples/scripts/parameter_file.txt
+++ b/examples/scripts/parameter_file.txt
@@ -10,8 +10,7 @@ ny=256
 # number of grid cells in the z dimension
 nz=256
 # final output time
-tout=3000
-#tout=3
+tout=200
 # time interval for output
 outstep=100
 n_hydro=1
@@ -23,16 +22,15 @@ n_rotated_projection=5
 gamma=1.66666667
 # name of initial conditions
 init=Disk_3D_particles
-bc_potential_type=1
 #init=Disk_3D
-#nfull=100
+bc_potential_type=1
 # domain properties
-xmin=-15
-ymin=-15
-zmin=-15
-xlen=30
-ylen=30
-zlen=30
+xmin=-2
+ymin=-2
+zmin=-2
+xlen=4
+ylen=4
+zlen=4
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
@@ -52,3 +50,4 @@ flag_delta=2
 ddelta_dt=-0.001
 # path to output directory
 outdir=./raw/
+prng_seed=42
diff --git a/extras/submit_job_lux b/extras/submit_job_lux
deleted file mode 100644
index 39c919f2d..000000000
--- a/extras/submit_job_lux
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=cholla_test      # Job name
-#SBATCH --partition=gpuq             # queue for job submission
-#SBATCH --mail-type=END,FAIL         # Mail events (NONE, BEGIN, END, FAIL, ALL)
-#SBATCH --mail-user=brvillas@ucsc.edu   # Where to send mail
-#SBATCH --ntasks=8                  # Number of MPI ranks
-#SBATCH --nodes=1                    # Number of nodes
-#SBATCH --ntasks-per-node=8         # How many tasks on each node
-#SBATCH --time=00:10:00              # Time limit hrs:min:sec
-#SBATCH --output=cuda_test_%j.log     # Standard output and error log
-
-pwd; hostname; date
-
-echo "Running program on $SLURM_JOB_NUM_NODES nodes with $SLURM_NTASKS total tasks, with each node getting $SLURM_NTASKS_PER_NODE running on cores."
-
-module load hdf5
-module load openmpi/4.0.1-cuda
-module load cuda10.1/10.1.168
-
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/brvillas/code/grackle/lib
-
-
-cd /home/brvillas/cholla
-
-mpirun -N 1 --map-by ppr:8:node ./cholla examples/3D/Spherical_Collapse.txt
-
-date
\ No newline at end of file
diff --git a/extras/submit_job_summit.lsf b/extras/submit_job_summit.lsf
deleted file mode 100644
index 5c2552c0a..000000000
--- a/extras/submit_job_summit.lsf
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# Begin LSF Directives
-#BSUB -P AST149
-#BSUB -W 0:10
-#BSUB -nnodes 2
-#BSUB -J sphere_256
-#BSUB -o sphere_256.o%J
-#BSUB -e sphere_256.e%J
-#BSUB -alloc_flags "smt4"
-
-module load hdf5
-module load cuda
-
-export WORK_DIR=$MEMBERWORK/ast149/sphere_explosion
-
-cd $MEMBERWORK/ast149/cholla
-date
-#export OMP_NUM_THREADS=10
-jsrun -n 8 -a 1 -c 7 -g 1 -r 4 -l CPU-CPU -d packed -b packed:7 ./cholla examples/3D/Spherical_Overpressure.txt > $WORK_DIR/run_output.log |sort
diff --git a/python_scripts/README.md b/python_scripts/README.md
index 5a462e8c1..acda923b7 100644
--- a/python_scripts/README.md
+++ b/python_scripts/README.md
@@ -5,15 +5,8 @@ You will likely develop more customized, robust, and flexible scripts for your o
 These simple scripts here are intended to help you understand the basics of the generated data from Cholla.
 
 ## Merging HDF5 files
-Multi-processor runs generate HDF5 files per-timestep per-processor.
-To treat each timestep together we want to merge those per-processor HDF5 files.
 
-| Script | Concatenate |
-| ------ | ----------- |
-`cat_dset_3d.py`    | 3D HDF5 datasets
-`cat_projection.py` | The on-axis projection data created when the -DPROJECTION flag is turned on
-`cat_rotated_projection.py` | The rotated projection data created when the -DROTATED_PROJECTION flag is turned on
-`cat_slice.py` | The on-axis slice data created when the -DSLICES flag is turned on
+Multi-processor runs generate HDF5 files per-timestep per-processor. Merging these per process output into a single file can be done with the concatenation scripts detailed in the "Outputs" section of the wiki.
 
 ## Plotting data
 We here present simple Python matplotlib-based scripts to plot density, velocity, energy, and pressure.
diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py
deleted file mode 100755
index 0c6d4b3ad..000000000
--- a/python_scripts/cat_dset_3D.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-# Example file for concatenating 3D hdf5 datasets
-
-import h5py
-import numpy as np
-
-ns = 0
-ne = 0
-n_proc = 16 # number of processors that did the calculations
-istart = 0*n_proc
-iend = 1*n_proc
-dnamein = './hdf5/raw/'
-dnameout = './hdf5/'
-DE = 0
-
-# loop over outputs
-for n in range(ns, ne+1):
-  
-  # loop over files for a given output
-  for i in range(istart, iend):
-
-    # open the output file for writing (don't overwrite if exists)
-    fileout = h5py.File(dnameout+str(n)+'.h5', 'a')
-    # open the input file for reading
-    filein = h5py.File(dnamein+str(n)+'.h5.'+str(i), 'r')
-    # read in the header data from the input file
-    head = filein.attrs
-
-    # if it's the first input file, write the header attributes 
-    # and create the datasets in the output file
-    if (i == 0):
-      nx = head['dims'][0]
-      ny = head['dims'][1]
-      nz = head['dims'][2]
-      fileout.attrs['dims'] = [nx, ny, nz]
-      fileout.attrs['gamma'] = [head['gamma'][0]]
-      fileout.attrs['t'] = [head['t'][0]]
-      fileout.attrs['dt'] = [head['dt'][0]]
-      fileout.attrs['n_step'] = [head['n_step'][0]]
-
-      units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'density_unit']
-      for unit in units:
-        fileout.attrs[unit] = [head[unit][0]]
-
-      d  = fileout.create_dataset("density", (nx, ny, nz), chunks=True)
-      mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True)
-      my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True)
-      mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True)
-      E  = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True)
-      if (DE):
-        GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True)
-
-    # write data from individual processor file to
-    # correct location in concatenated file
-    nxl = head['dims_local'][0]
-    nyl = head['dims_local'][1]
-    nzl = head['dims_local'][2]
-    xs = head['offset'][0]
-    ys = head['offset'][1]
-    zs = head['offset'][2]
-    fileout['density'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl]  = filein['density']
-    fileout['momentum_x'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_x']
-    fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y']
-    fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z']
-    fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl]  = filein['Energy']
-    if (DE):
-      fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy']
-      
-    filein.close()
-
-  fileout.close()
diff --git a/python_scripts/cat_particles.py b/python_scripts/cat_particles.py
deleted file mode 100644
index 03cbcd71c..000000000
--- a/python_scripts/cat_particles.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Example file for concatenating particle data
-
-import h5py
-import numpy as np
-
-ns = 0
-ne = 300
-n_procs = 4 # number of processors that did the cholla calculation
-dnamein = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-12.49-356588-SOR_ONLY_PARTICLES_DISK/raw/'
-dnameout = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-12.49-356588-SOR_ONLY_PARTICLES_DISK/particles_cat/'
-
-# loop over the output times
-for n in range(ns, ne+1):
-
-  # open the output file for writing
-  fileout = h5py.File(dnameout+str(n)+'_particles.h5', 'w')
-
-  if (n % 10 == 0): print(str(n))
-
-  # loop over files for a given output time
-  for i in range(0, n_procs):
-
-    # open the input file for reading
-    filein = h5py.File(dnamein+str(n)+'_particles.h5.'+str(i), 'r')
-    # read in the header data from the input file
-    head = filein.attrs
-
-    # if it's the first input file, write the header attributes
-    # and create the datasets in the output file
-    if (i == 0):
-      gamma = head['gamma']
-      t = head['t']
-      dt = head['dt']
-      n_step = head['n_step']
-      nx = head['dims'][0]
-      ny = head['dims'][1]
-      nz = head['dims'][2]
-      fileout.attrs['gamma'] = gamma
-      fileout.attrs['t'] = t
-      fileout.attrs['dt'] = dt
-      fileout.attrs['n_step'] = n_step
-      fileout.attrs['dims'] = [nx, ny, nz]
-      fileout.attrs['velocity_unit'] = head['velocity_unit']
-      fileout.attrs['length_unit'] = head['length_unit']
-      fileout.attrs['particle_mass'] = head['particle_mass']
-      fileout.attrs['density_unit'] = head['density_unit']
-
-      x = np.array([])
-      y = np.array([])
-      z = np.array([])
-      vx = np.array([])
-      vy = np.array([])
-      vz = np.array([])
-      particle_ids = np.array([])
-      density = np.zeros((nx, ny, nz))
-      n_total_particles = 0
-
-
-    # write data from individual processor file to
-    # correct location in concatenated file
-    nxl = head['dims_local'][0]
-    nyl = head['dims_local'][1]
-    nzl = head['dims_local'][2]
-    xs = head['offset'][0]
-    ys = head['offset'][1]
-    zs = head['offset'][2]
-
-    n_total_particles += head['n_particles_local']
-    density[xs:xs+nxl, ys:ys+nyl, zs:zs+nzl] += filein['density']
-    x = np.append(x, filein['pos_x'])
-    y = np.append(y, filein['pos_y'])
-    z = np.append(z, filein['pos_z'])
-    vx = np.append(vx, filein['vel_x'])
-    vy = np.append(vy, filein['vel_y'])
-    vz = np.append(vz, filein['vel_z'])
-    particle_ids = np.append(particle_ids, filein['particle_IDs'])
-
-    filein.close()
-
-  # write out the new datasets
-  fileout.create_dataset('x', data=x)
-  fileout.create_dataset('y', data=y)
-  fileout.create_dataset('z', data=z)
-  fileout.create_dataset('vx', data=vx)
-  fileout.create_dataset('vy', data=vy)
-  fileout.create_dataset('vz', data=vz)
-  fileout.create_dataset('particle_ids', data=particle_ids)
-  fileout.create_dataset('density', data=density)
-  fileout.attrs['n_total_particles'] = n_total_particles
-
-  fileout.close()
diff --git a/python_scripts/cat_projection.py b/python_scripts/cat_projection.py
deleted file mode 100755
index 29b56a416..000000000
--- a/python_scripts/cat_projection.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-# Example file for concatenating on-axis projection data
-# created when the -DPROJECTION flag is turned on
-
-import h5py
-import numpy as np
-
-ns = 0
-ne = 0
-n_procs = 16 # number of processors that did the cholla calculation
-dnamein = './hdf5/raw/'
-dnameout = './hdf5/'
-
-# loop over the output times
-for n in range(ns, ne+1):
-
-  # open the output file for writing
-  fileout = h5py.File(dnameout+str(n)+'_proj.h5', 'w')
-
-  # loop over files for a given output time
-  for i in range(0, n_procs):
-
-    # open the input file for reading
-    filein = h5py.File(dnamein+str(n)+'_proj.h5.'+str(i), 'r')
-    # read in the header data from the input file
-    head = filein.attrs
-
-    # if it's the first input file, write the header attributes
-    # and create the datasets in the output file
-    if (i == 0):
-      nx = head['dims'][0]
-      ny = head['dims'][1]
-      nz = head['dims'][2]
-      fileout.attrs['dims'] = [nx, ny, nz]
-      fileout.attrs['gamma'] = [head['gamma'][0]]
-      fileout.attrs['t'] = [head['t'][0]]
-      fileout.attrs['dt'] = [head['dt'][0]]
-      fileout.attrs['n_step'] = [head['n_step'][0]]
-
-      dxy = np.zeros((nx,ny))
-      dxz = np.zeros((nx,nz))
-      Txy = np.zeros((nx,ny))
-      Txz = np.zeros((nx,nz))
-
-    # write data from individual processor file to
-    # correct location in concatenated file
-    nxl = head['dims_local'][0]
-    nyl = head['dims_local'][1]
-    nzl = head['dims_local'][2]
-    xs = head['offset'][0]
-    ys = head['offset'][1]
-    zs = head['offset'][2]
-
-    dxy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy']
-    dxz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz']
-    Txy[xs:xs+nxl,ys:ys+nyl] += filein['T_xy']
-    Txz[xs:xs+nxl,zs:zs+nzl] += filein['T_xz']
-
-    filein.close()
-
-  # write out the new datasets
-  fileout.create_dataset('d_xy', data=dxy)
-  fileout.create_dataset('d_xz', data=dxz)
-  fileout.create_dataset('T_xy', data=Txy)
-  fileout.create_dataset('T_xz', data=Txz)
-
-  fileout.close()
diff --git a/python_scripts/cat_rotated_projection.py b/python_scripts/cat_rotated_projection.py
deleted file mode 100755
index 6e769ce55..000000000
--- a/python_scripts/cat_rotated_projection.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env python3
-# Example file for concatenating rotated projection data
-# created when the -DROTATED_PROJECTION flag is turned on
-
-import h5py
-import numpy as np
-
-ns = 0
-ne = 0
-n_procs = 16 # number of processors that did the cholla calculation
-dnamein = './hdf5/raw/'
-dnameout = './hdf5/'
-
-# loop over the output times
-for n in range(ns, ne+1):
-
-  # open the output file for writing
-  fileout = h5py.File(dnameout+str(n)+'_rot_proj.h5', 'w')
-
-  # loop over files for a given output time
-  for i in range(0, n_procs):
-
-    # open the input file for reading
-    filein = h5py.File(dnamein+str(n)+'_rot_proj.h5.'+str(i), 'r')
-    # read in the header data from the input file
-    head = filein.attrs
-
-    # if it's the first input file, write the header attributes
-    # and create the arrays to hold the output data
-    if (i == 0):
-      nxr = int(head['nxr'])
-      nzr = int(head['nzr'])
-      Lx = head['Lx']
-      Lz = head['Lz']
-      delta = head['delta']
-      theta = head['theta']
-      phi = head['phi']
-      gamma = head['gamma']
-      t = head['t']
-      dt = head['dt']
-      n_step = head['n_step']
-      fileout.attrs['nxr'] = nxr
-      fileout.attrs['nzr'] = nzr
-      fileout.attrs['Lx'] = Lx 
-      fileout.attrs['Lz'] = Lz 
-      fileout.attrs['delta'] = delta 
-      fileout.attrs['theta'] = theta 
-      fileout.attrs['phi'] = phi
-      fileout.attrs['gamma'] = gamma
-      fileout.attrs['t'] = t
-      fileout.attrs['dt'] = dt
-      fileout.attrs['n_step'] = n_step
-
-      d_xzr  = np.zeros((nxr, nzr))
-      vx_xzr = np.zeros((nxr, nzr))
-      vy_xzr = np.zeros((nxr, nzr))
-      vz_xzr = np.zeros((nxr, nzr))
-      T_xzr  = np.zeros((nxr, nzr))
-
-    # write data from individual processor file to
-    # correct location in concatenated file
-    nx_min = int(head['nx_min'])
-    nx_max = int(head['nx_max'])
-    nz_min = int(head['nz_min'])
-    nz_max = int(head['nz_max'])
-
-    d_xzr[nx_min:nx_max,nz_min:nz_max]  += filein['d_xzr'][:]
-    vx_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vx_xzr'][:]
-    vy_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vy_xzr'][:]
-    vz_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vz_xzr'][:]
-    T_xzr[nx_min:nx_max,nz_min:nz_max]  += filein['T_xzr'][:]
-
-    filein.close()
-
-  # write out the new datasets
-  fileout.create_dataset("d_xzr", data=d_xzr)
-  fileout.create_dataset("vx_xzr", data=vx_xzr)
-  fileout.create_dataset("vy_xzr", data=vy_xzr)
-  fileout.create_dataset("vz_xzr", data=vz_xzr)
-  fileout.create_dataset("T_xzr", data=T_xzr)
-
-  fileout.close()
- 
-
-
diff --git a/python_scripts/cat_slice.py b/python_scripts/cat_slice.py
deleted file mode 100644
index 7b6d15e12..000000000
--- a/python_scripts/cat_slice.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Example file for concatenating on-axis slice data
-# created when the -DSLICES flag is turned on
-
-import h5py
-import numpy as np
-
-ns = 0
-ne = 2
-n_procs = 4 # number of processors that did the cholla calculation
-dnamein = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-14.17-357075-SOR_HYDRO_DISK/raw/'
-dnameout = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-14.17-357075-SOR_HYDRO_DISK/catted_files'
-
-DE = True # set to True if Dual Energy flag was used
-SCALAR = False # set to True if Scalar was used
-
-# loop over the output times
-for n in range(ns, ne+1):
-
-  # open the output file for writing
-  fileout = h5py.File(dnameout+str(n)+'_slice.h5', 'w')
-
-  # loop over files for a given output time
-  for i in range(0, n_procs):
-
-    # open the input file for reading
-    filein = h5py.File(dnamein+str(n)+'_slice.h5.'+str(i), 'r')
-    # read in the header data from the input file
-    head = filein.attrs
-
-    # if it's the first input file, write the header attributes
-    # and create the datasets in the output file
-    if (i == 0):
-      gamma = head['gamma']
-      t = head['t']
-      dt = head['dt']
-      n_step = head['n_step']
-      nx = head['dims'][0]
-      ny = head['dims'][1]
-      nz = head['dims'][2]
-      fileout.attrs['gamma'] = gamma
-      fileout.attrs['t'] = t
-      fileout.attrs['dt'] = dt
-      fileout.attrs['n_step'] = n_step
-      fileout.attrs['dims'] = [nx, ny, nz]
-
-      d_xy = np.zeros((nx,ny))
-      d_xz = np.zeros((nx,nz))
-      d_yz = np.zeros((ny,nz))
-      mx_xy = np.zeros((nx,ny))
-      mx_xz = np.zeros((nx,nz))
-      mx_yz = np.zeros((ny,nz))
-      my_xy = np.zeros((nx,ny))
-      my_xz = np.zeros((nx,nz))
-      my_yz = np.zeros((ny,nz))
-      mz_xy = np.zeros((nx,ny))
-      mz_xz = np.zeros((nx,nz))
-      mz_yz = np.zeros((ny,nz))
-      E_xy = np.zeros((nx,ny))
-      E_xz = np.zeros((nx,nz))
-      E_yz = np.zeros((ny,nz))
-      if DE:
-       GE_xy = np.zeros((nx,ny))
-       GE_xz = np.zeros((nx,nz))
-       GE_yz = np.zeros((ny,nz))
-      if SCALAR:
-       scalar_xy = np.zeros((nx,ny))
-       scalar_xz = np.zeros((nx,nz))
-       scalar_yz = np.zeros((ny,nz))
-
-    # write data from individual processor file to
-    # correct location in concatenated file
-    nxl = head['dims_local'][0]
-    nyl = head['dims_local'][1]
-    nzl = head['dims_local'][2]
-    xs = head['offset'][0]
-    ys = head['offset'][1]
-    zs = head['offset'][2]
-
-    d_xy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy']
-    d_xz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz']
-    d_yz[ys:ys+nyl,zs:zs+nzl] += filein['d_yz']
-    mx_xy[xs:xs+nxl,ys:ys+nyl] += filein['mx_xy']
-    mx_xz[xs:xs+nxl,zs:zs+nzl] += filein['mx_xz']
-    mx_yz[ys:ys+nyl,zs:zs+nzl] += filein['mx_yz']
-    my_xy[xs:xs+nxl,ys:ys+nyl] += filein['my_xy']
-    my_xz[xs:xs+nxl,zs:zs+nzl] += filein['my_xz']
-    my_yz[ys:ys+nyl,zs:zs+nzl] += filein['my_yz']
-    mz_xy[xs:xs+nxl,ys:ys+nyl] += filein['mz_xy']
-    mz_xz[xs:xs+nxl,zs:zs+nzl] += filein['mz_xz']
-    mz_yz[ys:ys+nyl,zs:zs+nzl] += filein['mz_yz']
-    E_xy[xs:xs+nxl,ys:ys+nyl] += filein['E_xy']
-    E_xz[xs:xs+nxl,zs:zs+nzl] += filein['E_xz']
-    E_yz[ys:ys+nyl,zs:zs+nzl] += filein['E_yz']
-    if DE:
-      GE_xy[xs:xs+nxl,ys:ys+nyl] += filein['GE_xy']
-      GE_xz[xs:xs+nxl,zs:zs+nzl] += filein['GE_xz']
-      GE_yz[ys:ys+nyl,zs:zs+nzl] += filein['GE_yz']
-    if SCALAR:
-      scalar_xy[xs:xs+nxl,ys:ys+nyl] += filein['scalar_xy']
-      scalar_xz[xs:xs+nxl,zs:zs+nzl] += filein['scalar_xz']
-      scalar_yz[ys:ys+nyl,zs:zs+nzl] += filein['scalar_yz']
-
-    filein.close()
-
-  # wrte out the new datasets
-  fileout.create_dataset('d_xy', data=d_xy)
-  fileout.create_dataset('d_xz', data=d_xz)
-  fileout.create_dataset('d_yz', data=d_yz)
-  fileout.create_dataset('mx_xy', data=mx_xy)
-  fileout.create_dataset('mx_xz', data=mx_xz)
-  fileout.create_dataset('mx_yz', data=mx_yz)
-  fileout.create_dataset('my_xy', data=my_xy)
-  fileout.create_dataset('my_xz', data=my_xz)
-  fileout.create_dataset('my_yz', data=my_yz)
-  fileout.create_dataset('mz_xy', data=mz_xy)
-  fileout.create_dataset('mz_xz', data=mz_xz)
-  fileout.create_dataset('mz_yz', data=mz_yz)
-  fileout.create_dataset('E_xy', data=E_xy)
-  fileout.create_dataset('E_xz', data=E_xz)
-  fileout.create_dataset('E_yz', data=E_yz)
-  if DE:
-    fileout.create_dataset('GE_xy', data=GE_xy)
-    fileout.create_dataset('GE_xz', data=GE_xz)
-    fileout.create_dataset('GE_yz', data=GE_yz)
-  if SCALAR:
-    fileout.create_dataset('scalar_xy', data=scalar_xy)
-    fileout.create_dataset('scalar_xz', data=scalar_xz)
-    fileout.create_dataset('scalar_yz', data=scalar_yz)
-
-  fileout.close()
diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py
new file mode 100755
index 000000000..9c4e0dd86
--- /dev/null
+++ b/python_scripts/concat_2d_data.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+"""
+Python script for concatenating 2D hdf5 datasets for when -DSLICES,
+-DPROJECTION, or -DROTATED_PROJECTION is turned on in Cholla. Includes a CLI for
+concatenating Cholla HDF5 datasets and can be imported into other scripts where
+the `concat_2d_dataset` function can be used to concatenate the HDF5 files.
+
+Generally the easiest way to import this script is to add the `python_scripts`
+directory to your python path in your script like this:
+```
+import sys
+sys.path.append('/PATH/TO/CHOLLA/python_scripts')
+import concat_2d_data
+```
+"""
+
+import h5py
+import pathlib
+import numpy as np
+
+import concat_internals
+
+# ==============================================================================
+def concat_2d_dataset(output_directory: pathlib.Path,
+                      num_processes: int,
+                      output_number: int,
+                      dataset_kind: str,
+                      build_source_path,
+                      concat_xy: bool = True,
+                      concat_yz: bool = True,
+                      concat_xz: bool = True,
+                      skip_fields: list = [],
+                      destination_dtype: np.dtype = None,
+                      compression_type: str = None,
+                      compression_options: str = None,
+                      chunking = None) -> None:
+  """Concatenate 2D HDF5 Cholla datasets. i.e. take the single files
+    generated per process and concatenate them into a single, large file. This
+    function concatenates a single output time and can be called multiple times,
+    potentially in parallel, to concatenate multiple output times.
+
+  Parameters
+  ----------
+  output_directory : pathlib.Path
+      The directory containing the new concatenated files
+  num_processes : int
+      The number of ranks that Cholla was run with
+  output_number : int
+      The output number to concatenate
+  dataset_kind : str
+      The type of 2D dataset to concatenate. Can be 'slice', 'proj', or 'rot_proj'.
+  build_source_path : callable
+      A function used to construct the paths to the files that are to be concatenated.
+  concat_xy : bool
+      If True then concatenate the XY slices/projections. Defaults to True.
+  concat_yz : bool
+      If True then concatenate the YZ slices/projections. Defaults to True.
+  concat_xz : bool
+      If True then concatenate the XZ slices/projections. Defaults to True.
+  skip_fields : list
+      List of fields to skip concatenating. Defaults to [].
+  destination_dtype : np.dtype
+      The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.
+  compression_type : str
+      What kind of compression to use on the output data. Defaults to None.
+  compression_options : str
+      What compression settings to use if compressing. Defaults to None.
+  chunking : bool or tuple
+      Whether or not to use chunking and the chunk size. Defaults to None.
+  output_directory: pathlib.Path :
+
+  num_processes: int :
+
+  output_number: int :
+
+  dataset_kind: str :
+
+  concat_xy: bool :
+        (Default value = True)
+  concat_yz: bool :
+        (Default value = True)
+  concat_xz: bool :
+        (Default value = True)
+  skip_fields: list :
+        (Default value = [])
+  destination_dtype: np.dtype :
+        (Default value = None)
+  compression_type: str :
+        (Default value = None)
+  compression_options: str :
+        (Default value = None)
+
+  Returns
+  -------
+
+  """
+
+  # Error checking
+  assert num_processes > 1, 'num_processes must be greater than 1'
+  assert output_number >= 0, 'output_number must be greater than or equal to 0'
+  assert dataset_kind in ['slice', 'proj', 'rot_proj'], '`dataset_kind` can only be one of "slice", "proj", "rot_proj".'
+
+  # Open destination file
+  destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}_{dataset_kind}.h5')
+
+  # Setup the destination file
+  with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file:
+    # Copy over header
+    destination_file = concat_internals.copy_header(source_file, destination_file)
+
+    # Get a list of all datasets in the source file
+    datasets_to_copy = list(source_file.keys())
+
+    # Filter the datasets to only include those that need to be copied
+    if not concat_xy:
+      datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xy' in dataset]
+    if not concat_yz:
+      datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'yz' in dataset]
+    if not concat_xz:
+      datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xz' in dataset]
+    datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields]
+
+    # Create the datasets in the destination file
+    zero_array = np.zeros(1)
+    for dataset in datasets_to_copy:
+      dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype
+
+      dataset_shape = __get_2d_dataset_shape(source_file, dataset)
+
+      # Create array to initialize data to zero, this is required for projections
+      if zero_array.shape != dataset_shape:
+        zero_array = np.zeros(dataset_shape)
+
+      destination_file.create_dataset(name=dataset,
+                                      shape=dataset_shape,
+                                      data=zero_array,
+                                      dtype=dtype,
+                                      chunks=chunking,
+                                      compression=compression_type,
+                                      compression_opts=compression_options)
+
+  # Copy data
+  for rank in range(num_processes):
+    # Open source file
+    source_file = h5py.File(build_source_path(proc_id = rank, nfile = output_number), 'r')
+
+    # Loop through and copy datasets
+    for dataset in datasets_to_copy:
+      # Determine locations and shifts for writing
+      (i0_start, i0_end, i1_start, i1_end), file_in_slice = __write_bounds_2d_dataset(source_file, dataset)
+
+      # If this is a slice dataset we can skip loading the source file if that
+      # file isn't in the slice
+      if dataset_kind == 'slice' and not file_in_slice:
+        continue
+
+      # Copy the data, the summation is required for projections but not slices
+      destination_file[dataset][i0_start:i0_end,
+                                i1_start:i1_end] += source_file[dataset]
+
+    # Now that the copy is done we close the source file
+    source_file.close()
+
+  # Close destination file now that it is fully constructed
+  destination_file.close()
+# ==============================================================================
+
+# ==============================================================================
+def __get_2d_dataset_shape(source_file: h5py.File, dataset: str) -> tuple:
+  """Determine the shape of the full 2D dataset
+
+  Args:
+      source_file (h5py.File): The source file the get the shape information from
+      dataset (str): The dataset to get the shape of
+
+  Raises:
+      ValueError: If the dataset name isn't a 2D dataset name
+
+  Returns:
+      tuple: The dimensions of the dataset
+  """
+
+  if 'xzr' in dataset:
+    return (source_file.attrs['nxr'][0], source_file.attrs['nzr'][0])
+
+  nx, ny, nz = source_file.attrs['dims']
+  if 'xy' in dataset:
+    dimensions = (nx, ny)
+  elif 'yz' in dataset:
+    dimensions = (ny, nz)
+  elif 'xz' in dataset:
+    dimensions = (nx, nz)
+  else:
+    raise ValueError(f'Dataset "{dataset}" is not a slice.')
+
+  return dimensions
+# ==============================================================================
+
+# ==============================================================================
+def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str) -> tuple:
+  """Determine the bounds of the concatenated file to write to
+
+  Args:
+      source_file (h5py.File): The source file to read from
+      dataset (str): The name of the dataset to read from the source file
+
+  Raises:
+      ValueError: If the dataset name isn't a 2D dataset name
+
+  Returns:
+      tuple: The write bounds for the concatenated file to be used like
+      `output_file[dataset][return[0]:return[1], return[2]:return[3]]` followed by a bool to indicate if the file is
+      in the slice if concatenating a slice
+  """
+
+  if 'xzr' in dataset:
+    return (source_file.attrs['nx_min'][0], source_file.attrs['nx_max'][0],
+            source_file.attrs['nz_min'][0], source_file.attrs['nz_max'][0]), True
+
+  nx, ny, nz                   = source_file.attrs['dims']
+  nx_local, ny_local, nz_local = source_file.attrs['dims_local']
+  x_start, y_start, z_start    = source_file.attrs['offset']
+
+  if 'xy' in dataset:
+    file_in_slice = z_start <= nz//2 <= z_start+nz_local
+    bounds = (x_start, x_start+nx_local, y_start, y_start+ny_local)
+  elif 'yz' in dataset:
+    file_in_slice = x_start <= nx//2 <= x_start+nx_local
+    bounds = (y_start, y_start+ny_local, z_start, z_start+nz_local)
+  elif 'xz' in dataset:
+    file_in_slice = y_start <= ny//2 <= y_start+ny_local
+    bounds = (x_start, x_start+nx_local, z_start, z_start+nz_local)
+  else:
+    raise ValueError(f'Dataset "{dataset}" is not a slice or projection.')
+
+  return bounds, file_in_slice
+# ==============================================================================
+
+if __name__ == '__main__':
+  from timeit import default_timer
+  start = default_timer()
+
+  cli = concat_internals.common_cli()
+  cli.add_argument('-d', '--dataset-kind', type=str, required=True,    help='What kind of 2D dataset to concatnate. Options are "slice", "proj", and "rot_proj"')
+  cli.add_argument('--disable-xy', default=True, action='store_false', help='Disables concating the XY datasets.')
+  cli.add_argument('--disable-yz', default=True, action='store_false', help='Disables concating the YZ datasets.')
+  cli.add_argument('--disable-xz', default=True, action='store_false', help='Disables concating the XZ datasets.')
+  args = cli.parse_args()
+
+  build_source_path = concat_internals.get_source_path_builder(
+    source_directory = args.source_directory,
+    pre_extension_suffix = f'_{args.dataset_kind}',
+    known_output_snap = args.concat_outputs[0])
+
+  # Perform the concatenation
+  for output in args.concat_outputs:
+    concat_2d_dataset(output_directory=args.output_directory,
+                      num_processes=args.num_processes,
+                      output_number=output,
+                      dataset_kind=args.dataset_kind,
+                      build_source_path = build_source_path,
+                      concat_xy=args.disable_xy,
+                      concat_yz=args.disable_yz,
+                      concat_xz=args.disable_xz,
+                      skip_fields=args.skip_fields,
+                      destination_dtype=args.dtype,
+                      compression_type=args.compression_type,
+                      compression_options=args.compression_opts,
+                      chunking=args.chunking)
+
+  print(f'\nTime to execute: {round(default_timer()-start,2)} seconds')
diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py
new file mode 100755
index 000000000..1d5ba8228
--- /dev/null
+++ b/python_scripts/concat_3d_data.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""
+Python script for concatenating 3D hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be
+imported into other scripts where the `concat_3d_dataset` function can be used to concatenate the datasets.
+
+Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your
+script like this:
+```
+import sys
+sys.path.append('/PATH/TO/CHOLLA/python_scripts')
+import concat_3d_data
+```
+"""
+
+import h5py
+import numpy as np
+import pathlib
+
+import concat_internals
+
+# ==============================================================================
+def concat_3d_dataset(output_directory: pathlib.Path,
+                      num_processes: int,
+                      output_number: int,
+                      build_source_path,
+                      skip_fields: list = [],
+                      destination_dtype: np.dtype = None,
+                      compression_type: str = None,
+                      compression_options: str = None,
+                      chunking = None) -> None:
+  """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files
+  generated per process and concatenate them into a single, large file.
+
+  Parameters
+  ----------
+  output_directory : pathlib.Path
+      The directory containing the new concatenated files
+  num_processes : int
+      The number of ranks that Cholla was run with
+  output_number : int
+      The output number to concatenate
+  skip_fields : list
+      List of fields to skip concatenating. Defaults to [].
+  build_source_path : callable
+      A function used to construct the paths to the files that are to be concatenated.
+  destination_dtype : np.dtype
+      The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.
+  compression_type : str
+      What kind of compression to use on the output data. Defaults to None.
+  compression_options : str
+      What compression settings to use if compressing. Defaults to None.
+  chunking : bool or tuple
+      Whether or not to use chunking and the chunk size. Defaults to None.
+  output_directory: pathlib.Path :
+
+  num_processes: int :
+
+  output_number: int :
+
+  skip_fields: list :
+        (Default value = [])
+  destination_dtype: np.dtype :
+        (Default value = None)
+  compression_type: str :
+        (Default value = None)
+  compression_options: str :
+        (Default value = None)
+
+  Returns
+  -------
+
+  """
+
+  # Error checking
+  assert num_processes > 1, 'num_processes must be greater than 1'
+  assert output_number >= 0, 'output_number must be greater than or equal to 0'
+
+  # Open the output file for writing
+  destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}.h5')
+
+  # Setup the output file
+  with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file:
+    # Copy header data
+    destination_file = concat_internals.copy_header(source_file, destination_file)
+
+    # Create the datasets in the output file
+    datasets_to_copy = list(source_file.keys())
+    datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields]
+
+    for dataset in datasets_to_copy:
+      dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype
+
+      data_shape = source_file.attrs['dims']
+
+      if dataset == 'magnetic_x': data_shape[0] += 1
+      if dataset == 'magnetic_y': data_shape[1] += 1
+      if dataset == 'magnetic_z': data_shape[2] += 1
+
+      destination_file.create_dataset(name=dataset,
+                                      shape=data_shape,
+                                      dtype=dtype,
+                                      chunks=chunking,
+                                      compression=compression_type,
+                                      compression_opts=compression_options)
+
+  # loop over files for a given output
+  for i in range(0, num_processes):
+    # open the input file for reading
+    source_file = h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r')
+
+    # Compute the offset slicing
+    nx_local, ny_local, nz_local = source_file.attrs['dims_local']
+    x_start, y_start, z_start    = source_file.attrs['offset']
+    x_end, y_end, z_end          = x_start+nx_local, y_start+ny_local, z_start+nz_local
+
+    # write data from individual processor file to correct location in concatenated file
+    for dataset in datasets_to_copy:
+      magnetic_offset = [0,0,0]
+      if dataset == 'magnetic_x': magnetic_offset[0] = 1
+      if dataset == 'magnetic_y': magnetic_offset[1] = 1
+      if dataset == 'magnetic_z': magnetic_offset[2] = 1
+
+      destination_file[dataset][x_start:x_end+magnetic_offset[0],
+                                y_start:y_end+magnetic_offset[1],
+                                z_start:z_end+magnetic_offset[2]] = source_file[dataset]
+
+    # Now that the copy is done we close the source file
+    source_file.close()
+
+  # Close destination file now that it is fully constructed
+  destination_file.close()
+# ==============================================================================
+
+if __name__ == '__main__':
+  from timeit import default_timer
+  start = default_timer()
+
+  cli = concat_internals.common_cli()
+  args = cli.parse_args()
+
+  build_source_path = concat_internals.get_source_path_builder(
+    source_directory = args.source_directory,
+    pre_extension_suffix = '',
+    known_output_snap = args.concat_outputs[0])
+
+  # Perform the concatenation
+  for output in args.concat_outputs:
+    concat_3d_dataset(output_directory=args.output_directory,
+                      num_processes=args.num_processes,
+                      output_number=output,
+                      build_source_path = build_source_path,
+                      skip_fields=args.skip_fields,
+                      destination_dtype=args.dtype,
+                      compression_type=args.compression_type,
+                      compression_options=args.compression_opts,
+                      chunking=args.chunking)
+
+  print(f'\nTime to execute: {round(default_timer()-start,2)} seconds')
diff --git a/python_scripts/concat_internals.py b/python_scripts/concat_internals.py
new file mode 100755
index 000000000..bc615012e
--- /dev/null
+++ b/python_scripts/concat_internals.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+"""
+Contains all the common tools for the various concatnation functions/scipts
+"""
+
+import h5py
+import argparse
+import functools
+import pathlib
+
+# ==============================================================================
+def destination_safe_open(filename: pathlib.Path) -> h5py.File:
+  """Opens a HDF5 file safely and provides useful error messages for some common failure modes
+
+  Parameters
+  ----------
+  filename : pathlib.Path
+
+  The full path and name of the file to open :
+
+  filename: pathlib.Path :
+
+
+  Returns
+  -------
+  h5py.File
+
+  The opened HDF5 file object
+  """
+
+  try:
+    destination_file = h5py.File(filename, 'w-')
+  except FileExistsError:
+    # It might be better for this to simply print the error message and return
+    # rather than exiting. That way if a single call fails in a parallel
+    # environment it doesn't take down the entire job
+    raise FileExistsError(f'File "{filename}" already exists and will not be overwritten, skipping.')
+
+  return destination_file
+# ==============================================================================
+
+# ==============================================================================
+def copy_header(source_file: h5py.File, destination_file: h5py.File) -> h5py.File:
+  """Copy the attributes of one HDF5 file to another, skipping all fields that are specific to an individual rank
+
+  Parameters
+  ----------
+  source_file : h5py.File
+      The source file
+  destination_file : h5py.File
+      The destination file
+  source_file: h5py.File :
+
+  destination_file: h5py.File :
+
+
+  Returns
+  -------
+  h5py.File
+      The destination file with the new header attributes
+  """
+  fields_to_skip = ['dims_local', 'offset', 'n_particles_local']
+
+  for attr_key in source_file.attrs.keys():
+    if attr_key not in fields_to_skip:
+      destination_file.attrs[attr_key] = source_file.attrs[attr_key]
+
+  return destination_file
+# ==============================================================================
+
+# ==============================================================================
+def common_cli() -> argparse.ArgumentParser:
+  """This function provides the basis for the common CLI amongst the various concatenation scripts. It returns an
+    `argparse.ArgumentParser` object to which additional arguments can be passed before the final `.parse_args()` method
+    is used.
+
+  Parameters
+  ----------
+
+  Returns
+  -------
+  argparse.ArgumentParser
+    The common components of the CLI for the concatenation scripts
+  """
+
+  # ============================================================================
+  def concat_output(raw_argument: str) -> list:
+    """Function used to parse the `--concat-output` argument
+    """
+    # Check if the string is empty
+    if len(raw_argument) < 1:
+      raise ValueError('The --concat-output argument must not be of length zero.')
+
+    # Strip unneeded characters
+    cleaned_argument = raw_argument.replace(' ', '')
+    cleaned_argument = cleaned_argument.replace('[', '')
+    cleaned_argument = cleaned_argument.replace(']', '')
+
+    # Check that it only has the allowed characters
+    allowed_charaters = set('0123456789,-')
+    if not set(cleaned_argument).issubset(allowed_charaters):
+      raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.")
+
+    # Split on commas
+    cleaned_argument = cleaned_argument.split(',')
+
+    # Generate the final list
+    iterable_argument = set()
+    for arg in cleaned_argument:
+      if '-' not in arg:
+        if int(arg) < 0:
+          raise ValueError()
+        iterable_argument.add(int(arg))
+      else:
+        start, end = arg.split('-')
+        start, end = int(start), int(end)
+        if end < start:
+          raise ValueError('The end of a range must be larger than the start of the range.')
+        if start < 0:
+          raise ValueError()
+        iterable_argument = iterable_argument.union(set(range(start, end+1)))
+
+    return list(iterable_argument)
+  # ============================================================================
+
+  # ============================================================================
+  def positive_int(raw_argument: str) -> int:
+    arg = int(raw_argument)
+    if arg < 0:
+      raise ValueError('Argument must be 0 or greater.')
+
+    return arg
+  # ============================================================================
+
+  # ============================================================================
+  def skip_fields(raw_argument: str) -> list:
+    # Strip unneeded characters
+    cleaned_argument = raw_argument.replace(' ', '')
+    cleaned_argument = cleaned_argument.replace('[', '')
+    cleaned_argument = cleaned_argument.replace(']', '')
+    cleaned_argument = cleaned_argument.split(',')
+
+    return cleaned_argument
+  # ============================================================================
+
+  # ============================================================================
+  def chunk_arg(raw_argument: str) -> tuple:
+    # Strip unneeded characters
+    cleaned_argument = raw_argument.replace(' ', '')
+    cleaned_argument = cleaned_argument.replace('(', '')
+    cleaned_argument = cleaned_argument.replace(')', '')
+
+    # Check that it only has the allowed characters
+    allowed_charaters = set('0123456789,')
+    if not set(cleaned_argument).issubset(allowed_charaters):
+      raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.")
+
+    # Convert to a tuple and return
+    return tuple([int(i) for i in cleaned_argument.split(',')])
+  # ============================================================================
+
+  # Initialize the CLI
+  cli = argparse.ArgumentParser()
+
+  # Required Arguments
+  cli.add_argument('-s', '--source-directory', type=pathlib.Path,  required=True, help='The path to the directory for the source HDF5 files.')
+  cli.add_argument('-o', '--output-directory', type=pathlib.Path,  required=True, help='The path to the directory to write out the concatenated HDF5 files.')
+  cli.add_argument('-n', '--num-processes',    type=positive_int,  required=True, help='The number of processes that were used')
+  cli.add_argument('-c', '--concat-outputs',   type=concat_output, required=True, help='Which outputs to concatenate. Can be a single number (e.g. 8), a range (e.g. 2-9), or a list (e.g. [1,2,3]). Ranges are inclusive')
+
+  # Optional Arguments
+  cli.add_argument('--skip-fields',            type=skip_fields,   default=[],   help='List of fields to skip concatenating. Defaults to empty.')
+  cli.add_argument('--dtype',                  type=str,           default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.')
+  cli.add_argument('--compression-type',       type=str,           default=None, help='What kind of compression to use on the output data. Defaults to None.')
+  cli.add_argument('--compression-opts',       type=str,           default=None, help='What compression settings to use if compressing. Defaults to None.')
+  cli.add_argument('--chunking',               type=chunk_arg,     default=None, nargs='?', const=True, help='Enable chunking of the output file. Default is `False`. If set without an argument then the chunk size will be automatically chosen or a tuple can be passed to indicate the chunk size desired.')
+
+  return cli
+# ==============================================================================
+
+def _get_source_path(proc_id : int, source_directory : pathlib.Path,
+                     pre_extension_suffix : str, nfile : int, new_style : bool,
+                     extension : str = '.h5'):
+  dirname = str(source_directory)
+  if new_style:
+    out = f"{dirname}/{nfile}/{nfile}{pre_extension_suffix}{extension}.{proc_id}"
+  else:
+    # in principle, when source_directory isn't an empty string and it doesn't end
+    # end in a '/', part of it should act like a filename prefix
+    # -> with that said, the concatenation scripts have not supported this behavior
+    #    since we've made use of pathlib.Path
+    out = f"{dirname}/{nfile}{pre_extension_suffix}{extension}.{proc_id}"
+  return pathlib.Path(out)
+
+def get_source_path_builder(source_directory : pathlib.Path,
+                            pre_extension_suffix : str,
+                            known_output_snap : int):
+  """
+  Source files (that are to be concatenated) have one of 2 formats. This identifies
+  the format in use and returns a function appropriate for building the pathnames
+
+  This function auto-detect the format and returns a function to construct paths to these
+  files
+  """
+
+  # try newer format first:
+  common_kw = {'source_directory' : source_directory, 'extension' : '.h5',
+               'pre_extension_suffix' : pre_extension_suffix}
+  new_style_path = _get_source_path(proc_id = 0, nfile = known_output_snap,
+                                    new_style = True, **common_kw)
+  old_style_path = _get_source_path(proc_id = 0, nfile = known_output_snap,
+                                    new_style = False, **common_kw)
+  if new_style_path.is_file():
+    return functools.partial(_get_source_path, new_style = True, **common_kw)
+  elif old_style_path.is_file():
+    return functools.partial(_get_source_path, new_style = False, **common_kw)
+  raise RuntimeError(
+    "Could not find any files to concatenate. We searched "
+    f"{new_style_path!s} and {old_style_path!s}"
+  )
\ No newline at end of file
diff --git a/python_scripts/concat_particles.py b/python_scripts/concat_particles.py
new file mode 100755
index 000000000..89bb3bc1a
--- /dev/null
+++ b/python_scripts/concat_particles.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+"""
+Python script for concatenating particle hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be
+imported into other scripts where the `concat_particles_dataset` function can be used to concatenate the datasets.
+
+Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your
+script like this:
+```
+import sys
+sys.path.append('/PATH/TO/CHOLLA/python_scripts')
+import concat_particles
+```
+"""
+
+import h5py
+import numpy as np
+import pathlib
+
+import concat_internals
+
+# ======================================================================================================================
+def concat_particles_dataset(output_directory: pathlib.Path,
+                             num_processes: int,
+                             output_number: int,
+                             build_source_path,
+                             skip_fields: list = [],
+                             destination_dtype: np.dtype = None,
+                             compression_type: str = None,
+                             compression_options: str = None,
+                             chunking = None) -> None:
+  """Concatenate a single particle HDF5 Cholla dataset. i.e. take the single
+  files generated per process and concatenate them into a single, large file.
+
+  Parameters
+  ----------
+  output_directory : pathlib.Path
+      The directory containing the new concatenated files
+  num_processes : int
+      The number of ranks that Cholla was run with
+  output_number : int
+      The output number to concatenate
+  build_source_path : callable
+      A function used to construct the paths to the files that are to be concatenated.
+  skip_fields : list
+      List of fields to skip concatenating. Defaults to [].
+  destination_dtype : np.dtype
+      The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.
+  compression_type : str
+      What kind of compression to use on the output data. Defaults to None.
+  compression_options : str
+      What compression settings to use if compressing. Defaults to None.
+  chunking : bool or tuple
+      Whether or not to use chunking and the chunk size. Defaults to None.
+  output_directory: pathlib.Path :
+
+  num_processes: int :
+
+  output_number: int :
+
+  skip_fields: list :
+        (Default value = [])
+  destination_dtype: np.dtype :
+        (Default value = None)
+  compression_type: str :
+        (Default value = None)
+  compression_options: str :
+        (Default value = None)
+
+  Returns
+  -------
+
+  """
+
+  # Error checking
+  assert num_processes > 1, 'num_processes must be greater than 1'
+  assert output_number >= 0, 'output_number must be greater than or equal to 0'
+
+  # Open the output file for writing
+  destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}_particles.h5')
+
+  # Setup the output file
+  # Note that the call to `__get_num_particles` is potentially expensive as it
+  # opens every single file to read the number of particles in that file
+  num_particles    = __get_num_particles(build_source_path, num_processes, output_number)
+  destination_file = __setup_destination_file(build_source_path,
+                                              destination_file,
+                                              output_number,
+                                              num_particles,
+                                              skip_fields,
+                                              destination_dtype,
+                                              compression_type,
+                                              compression_options,
+                                              chunking)
+
+  # loop over files for a given output
+  particles_offset = 0
+  for i in range(0, num_processes):
+    # open the input file for reading
+    source_file = h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r')
+
+    # Compute the offset slicing for the 3D data
+    nx_local, ny_local, nz_local = source_file.attrs['dims_local']
+    x_start, y_start, z_start    = source_file.attrs['offset']
+    x_end, y_end, z_end          = x_start+nx_local, y_start+ny_local, z_start+nz_local
+
+    # Get the local number of particles
+    num_particles_local = source_file.attrs['n_particles_local'][0]
+
+    # write data from individual processor file to correct location in concatenated file
+    for dataset in list(destination_file.keys()):
+
+      if dataset == 'density':
+        destination_file[dataset][x_start:x_end,
+                                  y_start:y_end,
+                                  z_start:z_end] = source_file[dataset]
+      else:
+        start = particles_offset
+        end   = particles_offset + num_particles_local
+        destination_file[dataset][start:end] = source_file[dataset]
+
+    # Update the particles offset
+    particles_offset += num_particles_local
+
+    # Now that the copy is done we close the source file
+    source_file.close()
+
+  # Close destination file now that it is fully constructed
+  destination_file.close()
+# ==============================================================================
+
+# ==============================================================================
+def __get_num_particles(build_source_path,
+                        num_processes: int,
+                        output_number: int) -> int:
+  """Get the total number of particles in the output. This function is heavily
+  I/O bound and might benefit from utilizing threads.
+
+  Parameters
+  ----------
+  build_source_path : callable
+      A function used to construct the paths to the files that are to be concatenated.
+  num_processes : int
+      The number of processes
+  output_number : int
+      The output number to get data from
+
+  Returns
+  -------
+  int
+      The total number of particles in the output
+  """
+  # loop over files for a given output
+  num_particles = 0
+  for i in range(0, num_processes):
+    # open the input file for reading
+    with h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r') as source_file:
+      num_particles += source_file.attrs['n_particles_local']
+
+  return num_particles
+# ==============================================================================
+
+# ==============================================================================
+def __setup_destination_file(build_source_path,
+                             destination_file: h5py.File,
+                             output_number: int,
+                             num_particles: int,
+                             skip_fields: list,
+                             destination_dtype: np.dtype,
+                             compression_type: str,
+                             compression_options: str,
+                             chunking) -> h5py.File:
+  """Setup the destination file by copying the header and setting up the datasets
+
+  Parameters
+  ----------
+  build_source_path : callable
+      A function used to construct the paths to the files that are to be concatenated.
+  destination_file : h5py.File
+      The destination file
+  output_number : int
+      The output number to concatenate
+  num_particles : int
+      The total number of particles in the output
+  skip_fields : list
+      List of fields to skip concatenating.
+  destination_dtype : np.dtype
+      The data type of the output datasets. Accepts most numpy types.
+  compression_type : str
+      What kind of compression to use on the output data.
+  compression_options : str
+      What compression settings to use if compressing.
+  chunking : _type_
+      Whether or not to use chunking and the chunk size.
+
+  Returns
+  -------
+  h5py.File
+      The fully set up destination file
+  """
+  with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file:
+    # Copy header data
+    destination_file = concat_internals.copy_header(source_file, destination_file)
+
+    # Make list of datasets to copy
+    datasets_to_copy = list(source_file.keys())
+    datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields]
+
+    # Create the datasets in the output file
+    for dataset in datasets_to_copy:
+      dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype
+
+      # Determine the shape of the dataset
+      if dataset == 'density':
+        data_shape = source_file.attrs['dims']
+      else:
+        data_shape = num_particles
+
+      # Create the dataset
+      destination_file.create_dataset(name=dataset,
+                                      shape=data_shape,
+                                      dtype=dtype,
+                                      chunks=chunking,
+                                      compression=compression_type,
+                                      compression_opts=compression_options)
+
+  return destination_file
+# ==============================================================================
+
+if __name__ == '__main__':
+  from timeit import default_timer
+  start = default_timer()
+
+  cli = concat_internals.common_cli()
+  args = cli.parse_args()
+
+  build_source_path = concat_internals.get_source_path_builder(
+    source_directory = args.source_directory,
+    pre_extension_suffix = f'_particles',
+    known_output_snap = args.concat_outputs[0])
+
+  # Perform the concatenation
+  for output in args.concat_outputs:
+    concat_particles_dataset(output_directory=args.output_directory,
+                             num_processes=args.num_processes,
+                             output_number=output,
+                             build_source_path = build_source_path,
+                             skip_fields=args.skip_fields,
+                             destination_dtype=args.dtype,
+                             compression_type=args.compression_type,
+                             compression_options=args.compression_opts,
+                             chunking=args.chunking)
+
+  print(f'\nTime to execute: {round(default_timer()-start,2)} seconds')
diff --git a/python_scripts/dask_distributed_template.py b/python_scripts/dask_distributed_template.py
new file mode 100755
index 000000000..ac40294b2
--- /dev/null
+++ b/python_scripts/dask_distributed_template.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+This is the skeleton for how to run a Dask script on Andes at the OLCF. The CLI
+commands required are in the docstring at the top, major Dask steps are in
+functions, and `main` is mostly empty with a clear area on where to do your
+computations.
+
+Requirements: - Verified working with Dask v2023.6.0 - Install graphviz for
+python
+  - 'conda install -c conda-forge python-graphviz graphviz'
+  - Make sure your version of msgpack-python is at least v1.0.5; v1.0.3 had a bug
+    - `conda install -c conda-forge msgpack-python=1.0.5`
+
+Notes:
+- This is entirely focused on getting Dask to run on Andes, Crusher, and
+  Frontier. Other systems will likely need similar steps but not identical
+- Between each python script the Dask scheduler and workers need to be
+  restarted.
+- "--interface ib0" does not seem to be required but likely does improve
+  transfer speeds. On Crusher it throws an error, just omit it
+- It likes to spit out lots of ugly messages on shutdown that look like
+  something failed. Odds are that it worked fine and just didn't shutdown
+  gracefully
+- On OLCF systems Dask seems to hang on setup if you use more than 256
+  processes. I haven't dug too deeply into it but for now it might be better to
+  limit jobs to that size and run them longer or run multiple jobs, potentially
+  an array job
+- On OLCF systems it doesn't always end the job properly and the job will just
+  keep running and do nothing. Either set short walltimes so it times out or
+  just keep an eye on it. Maybe end with the script sending an exit command
+
+################################################################################
+#!/usr/bin/env bash
+
+#SBATCH -A <allocation here>
+#SBATCH -J <job name>
+#SBATCH -o <slurm output file>/%x-%j.out
+#SBATCH -t 04:00:00
+#SBATCH -p batch
+#SBATCH -N 32
+#SBATCH --mail-user=<your email> #SBATCH --mail-type=ALL
+
+# Setup some parameters DASK_SCHEDULE_FILE=$(pwd)/dask_schedule_file.json
+DASK_NUM_WORKERS=$((SLURM_JOB_NUM_NODES*8))
+
+# Add any scripts that you're importing to the PYTHONPATH, even ones in the same
+# directory. The worker tasks have their own directories and won't find any of
+# your scripts unless they're in the PYTHONPATH
+export PYTHONPATH="${PYTHONPATH}:/your/path/here"
+
+INTERFACE='--interface ib0' # For Andes
+# INTERFACE='' # For Crusher
+
+srun --exclusive --ntasks=1 dask scheduler $INTERFACE --scheduler-file $DASK_SCHEDULE_FILE --no-dashboard --no-show &
+
+# Wait for the dask-scheduler to start
+sleep 30
+
+srun --exclusive --ntasks=$DASK_NUM_WORKERS dask worker --scheduler-file $DASK_SCHEDULE_FILE --memory-limit='auto' --worker-class distributed.Worker $INTERFACE --no-dashboard --local-directory <path to directory, might not be required> &
+
+# Wait for workers to start
+sleep 10
+
+python -u ./dask-distributed-template.py --scheduler-file $DASK_SCHEDULE_FILE --num-workers $DASK_NUM_WORKERS
+
+wait
+################################################################################
+"""
+
+import dask
+import dask.array as da
+import dask.dataframe as dd
+from dask.distributed import Client
+from dask import graph_manipulation
+
+import pathlib
+import argparse
+
+# ==============================================================================
+def main():
+    # Get command line arguments
+    cli = argparse.ArgumentParser()
+    # Required Arguments
+    cli.add_argument('-N', '--num-workers',    type=int,          required=True, help='The number of workers to use')
+    cli.add_argument('-s', '--scheduler-file', type=pathlib.Path, required=True, help='The path to the scheduler file')
+    # Optional Arguments
+    # none yet, feel free to add your own
+    args = cli.parse_args()
+
+    # Setup the Dask cluster
+    client = startup_dask(args.scheduler_file, args.num_workers)
+
+    # Perform your computation
+    # ...
+    # ...
+    # ...
+    # Some suggestions:
+    # - If you're using Delayed then append all tasks to a list and execute them with `dask.compute(*command_list)`
+    # - Visualize task tree with `dask.visualize(*command_list, filename=str('filename.pdf'))
+    # - Add dependencies manually with `dask.graph_manipulation.bind(dependent_task, list_of_dependencies)`
+    # End of Computation
+
+    # Shutdown the Dask cluster
+    shutdown_dask(client)
+# ==============================================================================
+
+# ==============================================================================
+def startup_dask(scheduler_file, num_workers):
+    # Connect to the dask-cluster
+    client = Client(scheduler_file=scheduler_file)
+    print('client information ', client)
+
+    # Block until num_workers are ready
+    print(f'Waiting for {num_workers} workers...')
+    client.wait_for_workers(n_workers=num_workers)
+
+    num_connected_workers = len(client.scheduler_info()['workers'])
+    print(f'{num_connected_workers} workers connected')
+
+    return client
+# ==============================================================================
+
+# ==============================================================================
+def shutdown_dask(client):
+    print('Shutting down the cluster')
+    workers_list = list(client.scheduler_info()['workers'])
+    client.retire_workers(workers_list, close_workers=True)
+    client.shutdown()
+# ==============================================================================
+
+if __name__ == '__main__':
+    main()
diff --git a/python_scripts/dask_single_machine_template.py b/python_scripts/dask_single_machine_template.py
new file mode 100755
index 000000000..7816ec791
--- /dev/null
+++ b/python_scripts/dask_single_machine_template.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+ Written by Robert Caddy.
+
+ A simple template for Dask scripts running on a single machine
+================================================================================
+"""
+
+import dask
+import dask.array as da
+import dask.dataframe as dd
+from dask import graph_manipulation
+
+import argparse
+import pathlib
+
+# ==============================================================================
+def main():
+    cli = argparse.ArgumentParser()
+    # Required Arguments
+    # Optional Arguments
+    cli.add_argument('-n', '--num-workers', type=int, default=8, help='The number of workers to use.')
+    args = cli.parse_args()
+
+    # Set scheduler type. Options are 'threads', 'processes', 'single-threaded', and 'distributed'.
+    # - 'threads' uses threads that share memory, often fastest on single machines, can run into issuse with the GIL
+    # - 'processes' uses multiple processes that do not share memory, can be used to get around issues with the GIL
+    # - `single-threaded` is great for debugging
+    dask.config.set(scheduler='processes', num_workers=args.num_workers)
+
+    # Perform your computation
+    # ...
+    # ...
+    # ...
+    # Some suggestions:
+    # - If you're using Delayed then append all tasks to a list and execute them with `dask.compute(*command_list)`
+    # - Visualize task tree with `dask.visualize(*command_list, filename=str('filename.pdf'))
+    # - Add dependencies manually with `dask.graph_manipulation.bind(dependent_task, list_of_dependencies)`
+    # End of Computation
+# ==============================================================================
+
+if __name__ == '__main__':
+    from timeit import default_timer
+    start = default_timer()
+    main()
+    print(f'\nTime to execute: {round(default_timer()-start,2)} seconds')
diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp
index af147e776..d9eede2f1 100644
--- a/src/analysis/analysis.cpp
+++ b/src/analysis/analysis.cpp
@@ -1,71 +1,68 @@
 #ifdef ANALYSIS
 
-#include <stdio.h>
-#include "../analysis/analysis.h"
-#include "../io/io.h"
+  #include "../analysis/analysis.h"
 
+  #include <stdio.h>
 
-Analysis_Module::Analysis_Module( void ){}
+  #include "../io/io.h"
 
-#ifdef LYA_STATISTICS
-void Grid3D::Compute_Lya_Statistics( ){
+AnalysisModule::AnalysisModule(void) {}
 
+  #ifdef LYA_STATISTICS
+void Grid3D::Compute_Lya_Statistics()
+{
   int axis, n_skewers;
   Real time_start, time_end, time_elapsed;
-  time_start = get_time();
-  
+  time_start = Get_Time();
+
   // Copmpute Lya Statitics
-  chprintf( "Computing Lya Absorbiton along skewers \n");
-  for ( axis=0; axis<3; axis++ ){
-  
-    if ( axis == 0 ) n_skewers = Analysis.n_skewers_local_x;
-    if ( axis == 1 ) n_skewers = Analysis.n_skewers_local_y;
-    if ( axis == 2 ) n_skewers = Analysis.n_skewers_local_z;
-  
-    if ( axis == 0 ) chprintf( " Computing Along X axis: ");
-    if ( axis == 1 ) chprintf( " Computing Along Y axis: ");
-    if ( axis == 2 ) chprintf( " Computing Along Z axis: ");
-  
-  
-    Populate_Lya_Skewers_Local( axis );
-    Analysis.Initialize_Lya_Statistics_Measurements( axis );
-    Analysis.Transfer_Skewers_Data( axis );
-
-    for ( int skewer_id=0; skewer_id< n_skewers; skewer_id++ ){
-      Compute_Transmitted_Flux_Skewer( skewer_id, axis );
-      Analysis.Compute_Lya_Mean_Flux_Skewer( skewer_id, axis );
+  chprintf("Computing Lya Absorbiton along skewers \n");
+  for (axis = 0; axis < 3; axis++) {
+    if (axis == 0) n_skewers = Analysis.n_skewers_local_x;
+    if (axis == 1) n_skewers = Analysis.n_skewers_local_y;
+    if (axis == 2) n_skewers = Analysis.n_skewers_local_z;
+
+    if (axis == 0) chprintf(" Computing Along X axis: ");
+    if (axis == 1) chprintf(" Computing Along Y axis: ");
+    if (axis == 2) chprintf(" Computing Along Z axis: ");
+
+    Populate_Lya_Skewers_Local(axis);
+    Analysis.Initialize_Lya_Statistics_Measurements(axis);
+    Analysis.Transfer_Skewers_Data(axis);
+
+    for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) {
+      Compute_Transmitted_Flux_Skewer(skewer_id, axis);
+      Analysis.Compute_Lya_Mean_Flux_Skewer(skewer_id, axis);
     }
-    Analysis.Reduce_Lya_Mean_Flux_Axis( axis );
-    
+    Analysis.Reduce_Lya_Mean_Flux_Axis(axis);
+
     #ifdef OUTPUT_SKEWERS
-    Analysis.Transfer_Skewers_Global_Axis( axis );
+    Analysis.Transfer_Skewers_Global_Axis(axis);
     #endif
-  
-  }  
+  }
   Analysis.Reduce_Lya_Mean_Flux_Global();
 
   // if( Analysis.Flux_mean_HI > 1e-10 ){
-  
-  // Compute the Flux Power Spectrum after computing the mean transmitted flux 
-  for ( axis=0; axis<3; axis++ ){
 
-    if ( axis == 0 ) n_skewers = Analysis.n_skewers_local_x;
-    if ( axis == 1 ) n_skewers = Analysis.n_skewers_local_y;
-    if ( axis == 2 ) n_skewers = Analysis.n_skewers_local_z;
+  // Compute the Flux Power Spectrum after computing the mean transmitted flux
+  for (axis = 0; axis < 3; axis++) {
+    if (axis == 0) n_skewers = Analysis.n_skewers_local_x;
+    if (axis == 1) n_skewers = Analysis.n_skewers_local_y;
+    if (axis == 2) n_skewers = Analysis.n_skewers_local_z;
 
-    if ( axis == 0 ) chprintf( " Computing P(k) Along X axis\n");
-    if ( axis == 1 ) chprintf( " Computing P(k) Along Y axis\n");
-    if ( axis == 2 ) chprintf( " Computing P(k) Along Z axis\n");
+    if (axis == 0) chprintf(" Computing P(k) Along X axis\n");
+    if (axis == 1) chprintf(" Computing P(k) Along Y axis\n");
+    if (axis == 2) chprintf(" Computing P(k) Along Z axis\n");
 
-    Initialize_Power_Spectrum_Measurements( axis );
+    Initialize_Power_Spectrum_Measurements(axis);
 
-    for ( int skewer_id=0; skewer_id< n_skewers; skewer_id++ ){
-      Compute_Flux_Power_Spectrum_Skewer( skewer_id, axis );
+    for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) {
+      Compute_Flux_Power_Spectrum_Skewer(skewer_id, axis);
     }
-  
-    Analysis.Reduce_Power_Spectrum_Axis( axis );
+
+    Analysis.Reduce_Power_Spectrum_Axis(axis);
   }
-  
+
   Analysis.Reduce_Power_Spectrum_Global();
   Analysis.Computed_Flux_Power_Spectrum = 1;
 
@@ -73,27 +70,26 @@ void Grid3D::Compute_Lya_Statistics( ){
   //   Analysis.Computed_Flux_Power_Spectrum = 0;
   // }
 
-  time_end = get_time();
-  time_elapsed = (time_end - time_start)*1000;
-  chprintf( "Analysis Time: %f9.1 ms \n", time_elapsed );
+  time_end     = Get_Time();
+  time_elapsed = (time_end - time_start) * 1000;
+  chprintf("Analysis Time: %f9.1 ms \n", time_elapsed);
 }
-#endif //LYA_STATISTICS
-
-
-void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){
+  #endif  // LYA_STATISTICS
 
+void Grid3D::Compute_and_Output_Analysis(struct Parameters *P)
+{
   #ifdef COSMOLOGY
-  chprintf("\nComputing Analysis  current_z: %f\n", Analysis.current_z );
-  #else 
+  chprintf("\nComputing Analysis  current_z: %f\n", Analysis.current_z);
+  #else
   chprintf("\nComputing Analysis \n");
   #endif
-  
-  cudaMemcpy( C.density, C.device, H.n_fields*H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost);
+
+  cudaMemcpy(C.density, C.device, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost);
 
   #ifdef PHASE_DIAGRAM
-  #ifdef CHEMISTRY_GPU
-  Compute_Gas_Temperature( Chem.Fields.temperature_h, true ); 
-  #endif
+    #ifdef CHEMISTRY_GPU
+  Compute_Gas_Temperature(Chem.Fields.temperature_h, true);
+    #endif
   Compute_Phase_Diagram();
   #endif
 
@@ -101,40 +97,39 @@ void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){
   Compute_Lya_Statistics();
   #endif
 
-  //Write to HDF5 file
-  #ifdef MPI_CHOLLA
-  if ( procID == 0 ) Output_Analysis(P);
-  #else
+  // Write to HDF5 file
+  #if defined(COSMOLOGY) || defined(PHASE_DIAGRAM) || defined(LYA_STATISTICS)
+    #ifdef MPI_CHOLLA
+  if (procID == 0) Output_Analysis(P);
+    #else
   Output_Analysis(P);
+    #endif
   #endif
 
-
   #ifdef LYA_STATISTICS
   if (Analysis.Computed_Flux_Power_Spectrum == 1) Analysis.Clear_Power_Spectrum_Measurements();
   #endif
 
   #ifdef COSMOLOGY
   Analysis.Set_Next_Scale_Output();
-  #endif 
+  #endif
 
   Analysis.Output_Now = false;
 
-
   // exit(0);
 }
 
+void Grid3D::Initialize_AnalysisModule(struct Parameters *P)
+{
+  chprintf("\nInitializng Analysis Module...\n");
 
-
-void Grid3D::Initialize_Analysis_Module( struct parameters *P ){
-  
-  chprintf( "\nInitializng Analysis Module...\n");
-  
   #ifndef MPI_CHOLLA
-  chprintf( "The Analysys Module is implemented for the MPI version only... sorry!\n ");
+  chprintf(
+      "The Analysys Module is implemented for the MPI version only... "
+      "sorry!\n ");
   exit(-1);
   #endif
-  
-  
+
   Real z_now;
   #ifdef COSMOLOGY
   z_now = Cosmo.current_z;
@@ -142,41 +137,43 @@ void Grid3D::Initialize_Analysis_Module( struct parameters *P ){
   z_now = 0;
   #endif
 
-  Analysis.Initialize( H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, H.zblocal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P );
-
+  Analysis.Initialize(H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, H.zblocal, P->nx, P->ny, P->nz,
+                      H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P);
 }
 
-void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, struct parameters *P ){
-
-  //Domain Length
+void AnalysisModule::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz,
+                                int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real,
+                                int n_ghost_hydro, Real z_now, struct Parameters *P)
+{
+  // Domain Length
   Lbox_x = Lx;
   Lbox_y = Ly;
   Lbox_z = Lz;
 
-  //Left Boundaries of Local domain
+  // Left Boundaries of Local domain
   xMin = x_min;
   yMin = y_min;
   zMin = z_min;
 
-  //Cell sizes
+  // Cell sizes
   dx = dx_real;
   dy = dy_real;
   dz = dz_real;
 
-  //Size of Global Domain
+  // Size of Global Domain
   nx_total = nx;
   ny_total = ny;
   nz_total = nz;
 
-  //Size of Local Domain
+  // Size of Local Domain
   nx_local = nx_real;
   ny_local = ny_real;
   nz_local = nz_real;
 
-  //Number of ghost cells in the conserved arrays
+  // Number of ghost cells in the conserved arrays
   n_ghost = n_ghost_hydro;
 
-  //Domain Global left Boundary
+  // Domain Global left Boundary
   xMin_global = P->xmin;
   yMin_global = P->ymin;
   zMin_global = P->zmin;
@@ -184,7 +181,7 @@ void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_
   #ifdef COSMOLOGY
   current_z = z_now;
 
-  //Load values of scale factor for analysis outputs
+  // Load values of scale factor for analysis outputs
   Load_Scale_Outputs(P);
   #endif
 
@@ -196,139 +193,128 @@ void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_
   Initialize_Lya_Statistics(P);
   #endif
 
-  chprintf( "Analysis Module Successfully Initialized.\n\n");
-
-
+  chprintf("Analysis Module Successfully Initialized.\n\n");
 }
 
-
-
-
-
-void Analysis_Module::Reset(){
-
+void AnalysisModule::Reset()
+{
   #ifdef PHASE_DIAGRAM
   free(phase_diagram);
   #endif
 
   #ifdef LYA_STATISTICS
-  free( skewers_HI_density_local_x );
-  free( skewers_HI_density_local_y );
-  free( skewers_HI_density_local_z );
-  free( skewers_HeII_density_local_x );
-  free( skewers_HeII_density_local_y );
-  free( skewers_HeII_density_local_z );
-  free( skewers_velocity_local_x );
-  free( skewers_velocity_local_y );
-  free( skewers_velocity_local_z );
-  free( skewers_temperature_local_x );
-  free( skewers_temperature_local_y );
-  free( skewers_temperature_local_z );
-  #ifdef OUTPUT_SKEWERS
-  free( skewers_density_local_x );
-  free( skewers_density_local_y );
-  free( skewers_density_local_z ); 
-  #endif
-  
-  #ifdef MPI_CHOLLA
-  
-  if ( procID == 0 ){
-    free( root_procs_x );
-    free( root_procs_y );
-    free( root_procs_z );
+  free(skewers_HI_density_local_x);
+  free(skewers_HI_density_local_y);
+  free(skewers_HI_density_local_z);
+  free(skewers_HeII_density_local_x);
+  free(skewers_HeII_density_local_y);
+  free(skewers_HeII_density_local_z);
+  free(skewers_velocity_local_x);
+  free(skewers_velocity_local_y);
+  free(skewers_velocity_local_z);
+  free(skewers_temperature_local_x);
+  free(skewers_temperature_local_y);
+  free(skewers_temperature_local_z);
     #ifdef OUTPUT_SKEWERS
-    free( transfer_buffer_root_x );
-    free( transfer_buffer_root_y );
-    free( transfer_buffer_root_z );
-    free( skewers_transmitted_flux_HI_x_global );
-    free( skewers_transmitted_flux_HI_y_global );
-    free( skewers_transmitted_flux_HI_z_global );
-    free( skewers_transmitted_flux_HeII_x_global );
-    free( skewers_transmitted_flux_HeII_y_global );
-    free( skewers_transmitted_flux_HeII_z_global );
-    free( skewers_density_x_global );
-    free( skewers_density_y_global );
-    free( skewers_density_z_global );
-    free( skewers_HI_density_x_global );
-    free( skewers_HI_density_y_global );
-    free( skewers_HI_density_z_global );
-    free( skewers_HeII_density_x_global );
-    free( skewers_HeII_density_y_global );
-    free( skewers_HeII_density_z_global );
-    free( skewers_temperature_x_global );
-    free( skewers_temperature_y_global );
-    free( skewers_temperature_z_global );
-    free( skewers_los_velocity_x_global );
-    free( skewers_los_velocity_y_global );
-    free( skewers_los_velocity_z_global );
-    
-    #endif
-  }
-  
-  if ( am_I_root_x ){
-    free( skewers_HI_density_root_x );
-    free( skewers_HeII_density_root_x );
-    free( skewers_velocity_root_x );
-    free( skewers_temperature_root_x );
-    free( full_HI_density_x );
-    free( full_HeII_density_x );
-    free( full_velocity_x );
-    free( full_temperature_x );
-    free( full_optical_depth_HI_x );
-    free( full_optical_depth_HeII_x );
-    free( full_vel_Hubble_x );
-    free( skewers_transmitted_flux_HI_x );
-    free( skewers_transmitted_flux_HeII_x );
-    #ifdef OUTPUT_SKEWERS
-    free( skewers_density_root_x );  
+  free(skewers_density_local_x);
+  free(skewers_density_local_y);
+  free(skewers_density_local_z);
     #endif
+
+    #ifdef MPI_CHOLLA
+
+  if (procID == 0) {
+    free(root_procs_x);
+    free(root_procs_y);
+    free(root_procs_z);
+      #ifdef OUTPUT_SKEWERS
+    free(transfer_buffer_root_x);
+    free(transfer_buffer_root_y);
+    free(transfer_buffer_root_z);
+    free(skewers_transmitted_flux_HI_x_global);
+    free(skewers_transmitted_flux_HI_y_global);
+    free(skewers_transmitted_flux_HI_z_global);
+    free(skewers_transmitted_flux_HeII_x_global);
+    free(skewers_transmitted_flux_HeII_y_global);
+    free(skewers_transmitted_flux_HeII_z_global);
+    free(skewers_density_x_global);
+    free(skewers_density_y_global);
+    free(skewers_density_z_global);
+    free(skewers_HI_density_x_global);
+    free(skewers_HI_density_y_global);
+    free(skewers_HI_density_z_global);
+    free(skewers_HeII_density_x_global);
+    free(skewers_HeII_density_y_global);
+    free(skewers_HeII_density_z_global);
+    free(skewers_temperature_x_global);
+    free(skewers_temperature_y_global);
+    free(skewers_temperature_z_global);
+    free(skewers_los_velocity_x_global);
+    free(skewers_los_velocity_y_global);
+    free(skewers_los_velocity_z_global);
+
+      #endif
   }
 
-  if ( am_I_root_y ){
-    free( skewers_HI_density_root_y );
-    free( skewers_HeII_density_root_y );
-    free( skewers_velocity_root_y );
-    free( skewers_temperature_root_y );
-    free( full_HI_density_y );
-    free( full_HeII_density_y );
-    free( full_velocity_y );
-    free( full_temperature_y );
-    free( full_optical_depth_HI_y );
-    free( full_optical_depth_HeII_y );
-    free( full_vel_Hubble_y );
-    free( skewers_transmitted_flux_HI_y );
-    free( skewers_transmitted_flux_HeII_y );
-    #ifdef OUTPUT_SKEWERS
-    free( skewers_density_root_y );  
-    #endif
+  if (am_I_root_x) {
+    free(skewers_HI_density_root_x);
+    free(skewers_HeII_density_root_x);
+    free(skewers_velocity_root_x);
+    free(skewers_temperature_root_x);
+    free(full_HI_density_x);
+    free(full_HeII_density_x);
+    free(full_velocity_x);
+    free(full_temperature_x);
+    free(full_optical_depth_HI_x);
+    free(full_optical_depth_HeII_x);
+    free(full_vel_Hubble_x);
+    free(skewers_transmitted_flux_HI_x);
+    free(skewers_transmitted_flux_HeII_x);
+      #ifdef OUTPUT_SKEWERS
+    free(skewers_density_root_x);
+      #endif
   }
 
-  if ( am_I_root_z ){
-    free( skewers_HI_density_root_z );
-    free( skewers_HeII_density_root_z );
-    free( skewers_velocity_root_z );
-    free( skewers_temperature_root_z );
-    free( full_HI_density_z );
-    free( full_HeII_density_z );
-    free( full_velocity_z );
-    free( full_temperature_z );
-    free( full_optical_depth_HI_z );
-    free( full_optical_depth_HeII_z );
-    free( full_vel_Hubble_z );
-    free( skewers_transmitted_flux_HI_z );
-    free( skewers_transmitted_flux_HeII_z );
-    #ifdef OUTPUT_SKEWERS
-    free( skewers_density_root_z );  
-    #endif
+  if (am_I_root_y) {
+    free(skewers_HI_density_root_y);
+    free(skewers_HeII_density_root_y);
+    free(skewers_velocity_root_y);
+    free(skewers_temperature_root_y);
+    free(full_HI_density_y);
+    free(full_HeII_density_y);
+    free(full_velocity_y);
+    free(full_temperature_y);
+    free(full_optical_depth_HI_y);
+    free(full_optical_depth_HeII_y);
+    free(full_vel_Hubble_y);
+    free(skewers_transmitted_flux_HI_y);
+    free(skewers_transmitted_flux_HeII_y);
+      #ifdef OUTPUT_SKEWERS
+    free(skewers_density_root_y);
+      #endif
   }
 
+  if (am_I_root_z) {
+    free(skewers_HI_density_root_z);
+    free(skewers_HeII_density_root_z);
+    free(skewers_velocity_root_z);
+    free(skewers_temperature_root_z);
+    free(full_HI_density_z);
+    free(full_HeII_density_z);
+    free(full_velocity_z);
+    free(full_temperature_z);
+    free(full_optical_depth_HI_z);
+    free(full_optical_depth_HeII_z);
+    free(full_vel_Hubble_z);
+    free(skewers_transmitted_flux_HI_z);
+    free(skewers_transmitted_flux_HeII_z);
+      #ifdef OUTPUT_SKEWERS
+    free(skewers_density_root_z);
+      #endif
+  }
 
+    #endif
   #endif
-  #endif
-
-
 }
 
-
-
 #endif
diff --git a/src/analysis/analysis.h b/src/analysis/analysis.h
index 096d6b6bd..59ccb050a 100644
--- a/src/analysis/analysis.h
+++ b/src/analysis/analysis.h
@@ -1,20 +1,19 @@
 #ifdef ANALYSIS
 
-#ifndef ANALYSIS_H
-#define ANALYSIS_H
+  #ifndef ANALYSIS_H
+    #define ANALYSIS_H
 
-#include "../global/global.h"
-#include <vector>
+    #include <vector>
 
-#ifdef LYA_STATISTICS
-#include <fftw3.h>
-#endif
-
-using namespace std;
+    #include "../global/global.h"
 
-class Analysis_Module{
-public:
+    #ifdef LYA_STATISTICS
+      #include <fftw3.h>
+    #endif
 
+class AnalysisModule
+{
+ public:
   Real Lbox_x;
   Real Lbox_y;
   Real Lbox_z;
@@ -47,13 +46,11 @@ class Analysis_Module{
   bool Output_Now;
   int n_file;
 
-  #ifdef COSMOLOGY
+    #ifdef COSMOLOGY
   Real current_z;
-  #endif
+    #endif
 
-
-
-  #ifdef PHASE_DIAGRAM
+    #ifdef PHASE_DIAGRAM
   int n_dens;
   int n_temp;
   Real temp_min;
@@ -61,13 +58,12 @@ class Analysis_Module{
   Real dens_min;
   Real dens_max;
   float *phase_diagram;
-  #ifdef MPI_CHOLLA
+      #ifdef MPI_CHOLLA
   float *phase_diagram_global;
-  #endif
-  #endif
-
+      #endif
+    #endif
 
-  #ifdef LYA_STATISTICS
+    #ifdef LYA_STATISTICS
   int Computed_Flux_Power_Spectrum;
   int n_stride;
   int n_skewers_local_x;
@@ -128,7 +124,6 @@ class Analysis_Module{
   Real *full_HI_density_y;
   Real *full_HI_density_z;
 
-
   Real *full_HeII_density_x;
   Real *full_HeII_density_y;
   Real *full_HeII_density_z;
@@ -140,91 +135,89 @@ class Analysis_Module{
   Real *full_temperature_x;
   Real *full_temperature_y;
   Real *full_temperature_z;
-  
+
   Real *full_optical_depth_HI_x;
   Real *full_optical_depth_HI_y;
   Real *full_optical_depth_HI_z;
-  
+
   Real *full_optical_depth_HeII_x;
   Real *full_optical_depth_HeII_y;
   Real *full_optical_depth_HeII_z;
-  
+
   Real *full_vel_Hubble_x;
   Real *full_vel_Hubble_y;
   Real *full_vel_Hubble_z;
-  
+
   Real *skewers_transmitted_flux_HI_x;
   Real *skewers_transmitted_flux_HI_y;
   Real *skewers_transmitted_flux_HI_z;
-  
+
   Real *skewers_transmitted_flux_HeII_x;
   Real *skewers_transmitted_flux_HeII_y;
   Real *skewers_transmitted_flux_HeII_z;
-  
-  #ifdef OUTPUT_SKEWERS
-  
+
+      #ifdef OUTPUT_SKEWERS
+
   Real *skewers_density_local_x;
   Real *skewers_density_local_y;
   Real *skewers_density_local_z;
-  
+
   Real *skewers_density_root_x;
   Real *skewers_density_root_y;
   Real *skewers_density_root_z;
-  
+
   Real *skewers_density_x_global;
   Real *skewers_density_y_global;
   Real *skewers_density_z_global;
-  
-  
+
   Real *skewers_HI_density_x_global;
   Real *skewers_HI_density_y_global;
   Real *skewers_HI_density_z_global;
-  
+
   Real *skewers_HeII_density_x_global;
   Real *skewers_HeII_density_y_global;
   Real *skewers_HeII_density_z_global;
-  
+
   Real *skewers_temperature_x_global;
   Real *skewers_temperature_y_global;
   Real *skewers_temperature_z_global;
-  
+
   Real *skewers_los_velocity_x_global;
   Real *skewers_los_velocity_y_global;
   Real *skewers_los_velocity_z_global;
-  
+
   Real *skewers_transmitted_flux_HI_x_global;
   Real *skewers_transmitted_flux_HI_y_global;
   Real *skewers_transmitted_flux_HI_z_global;
-  
+
   Real *skewers_transmitted_flux_HeII_x_global;
   Real *skewers_transmitted_flux_HeII_y_global;
   Real *skewers_transmitted_flux_HeII_z_global;
-  
+
   Real *transfer_buffer_root_x;
   Real *transfer_buffer_root_y;
   Real *transfer_buffer_root_z;
-  #endif
-    
+      #endif
+
   Real Flux_mean_root_HI_x;
   Real Flux_mean_root_HI_y;
   Real Flux_mean_root_HI_z;
-  
+
   Real Flux_mean_root_HeII_x;
   Real Flux_mean_root_HeII_y;
   Real Flux_mean_root_HeII_z;
-    
+
   Real Flux_mean_HI_x;
   Real Flux_mean_HI_y;
   Real Flux_mean_HI_z;
-  
+
   Real Flux_mean_HeII_x;
   Real Flux_mean_HeII_y;
   Real Flux_mean_HeII_z;
-    
+
   Real Flux_mean_HI;
   Real Flux_mean_HeII;
 
-
   int n_skewers_processed;
 
   int n_ghost_skewer;
@@ -281,51 +274,48 @@ class Analysis_Module{
   Real *ps_global_z;
   Real *ps_mean;
   Real *k_centers;
-  
+
   bool *root_procs_x;
   bool *root_procs_y;
-  bool *root_procs_z; 
-  
-  #ifdef MPI_CHOLLA
+  bool *root_procs_z;
+
+      #ifdef MPI_CHOLLA
   Real *mpi_domain_boundary_x;
   Real *mpi_domain_boundary_y;
   Real *mpi_domain_boundary_z;
   vector<int> mpi_indices_x;
   vector<int> mpi_indices_y;
   vector<int> mpi_indices_z;
-  #endif
+      #endif
 
-  #endif
+    #endif
 
-
-  Analysis_Module( void );
-  void Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, struct parameters *P );
+  AnalysisModule(void);
+  void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real,
+                  int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now,
+                  struct Parameters *P);
   void Reset(void);
 
-  void Load_Scale_Outputs( struct parameters *P );
-  void Set_Next_Scale_Output(  );
-
-
-
-  #ifdef PHASE_DIAGRAM
-  void Initialize_Phase_Diagram( struct parameters *P );
-  #endif
-
-  #ifdef LYA_STATISTICS
-  void Initialize_Lya_Statistics( struct parameters *P );
-  void Initialize_Lya_Statistics_Measurements( int axis );
-  void Transfer_Skewers_Data( int axis );
-  void Compute_Lya_Mean_Flux_Skewer( int skewer_id, int axis );
-  void Reduce_Lya_Mean_Flux_Axis( int axis );
-  void Reduce_Lya_Mean_Flux_Global( );
-  void Clear_Power_Spectrum_Measurements( void );
-  void Reduce_Power_Spectrum_Axis( int axis );
-  void Reduce_Power_Spectrum_Global( );
-  void Transfer_Skewers_Global_Axis( int axis );
-  #endif
+  void Load_Scale_Outputs(struct Parameters *P);
+  void Set_Next_Scale_Output();
+
+    #ifdef PHASE_DIAGRAM
+  void Initialize_Phase_Diagram(struct Parameters *P);
+    #endif
+
+    #ifdef LYA_STATISTICS
+  void Initialize_Lya_Statistics(struct Parameters *P);
+  void Initialize_Lya_Statistics_Measurements(int axis);
+  void Transfer_Skewers_Data(int axis);
+  void Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis);
+  void Reduce_Lya_Mean_Flux_Axis(int axis);
+  void Reduce_Lya_Mean_Flux_Global();
+  void Clear_Power_Spectrum_Measurements(void);
+  void Reduce_Power_Spectrum_Axis(int axis);
+  void Reduce_Power_Spectrum_Global();
+  void Transfer_Skewers_Global_Axis(int axis);
+    #endif
 };
 
-
-
-#endif
+  #endif
 #endif
diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp
new file mode 100644
index 000000000..3dab7b6da
--- /dev/null
+++ b/src/analysis/feedback_analysis.cpp
@@ -0,0 +1,143 @@
+#include "feedback_analysis.h"
+
+#include "../io/io.h"
+#include "../model/disk_galaxy.h"
+
+#ifdef MPI_CHOLLA
+  #include "../mpi/mpi_routines.h"
+#endif
+
+#define VRMS_CUTOFF_DENSITY (0.01 * 0.6 * MP / DENSITY_UNIT)
+
+FeedbackAnalysis::FeedbackAnalysis(Grid3D& G)
+{
+  // allocate arrays
+  h_circ_vel_x = (Real*)malloc(G.H.n_cells * sizeof(Real));
+  h_circ_vel_y = (Real*)malloc(G.H.n_cells * sizeof(Real));
+
+#ifdef PARTICLES_GPU
+  GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells * sizeof(Real)));
+#endif
+
+  // setup the (constant) circular speed arrays
+  int id;
+  Real vca, r, x, y, z;
+
+  for (int k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) {
+    for (int j = G.H.n_ghost; j < G.H.ny - G.H.n_ghost; j++) {
+      for (int i = G.H.n_ghost; i < G.H.nx - G.H.n_ghost; i++) {
+        id = i + j * G.H.nx + k * G.H.nx * G.H.ny;
+
+        G.Get_Position(i, j, k, &x, &y, &z);
+        r = sqrt(x * x + y * y);
+
+        vca              = sqrt(r * fabs(galaxies::MW.gr_total_D3D(r, z)));
+        h_circ_vel_x[id] = -y / r * vca;
+        h_circ_vel_y[id] = x / r * vca;
+      }
+    }
+  }
+
+#ifdef PARTICLES_GPU
+  GPU_Error_Check(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice));
+  GPU_Error_Check(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice));
+#endif
+}
+
+FeedbackAnalysis::~FeedbackAnalysis()
+{
+  free(h_circ_vel_x);
+  free(h_circ_vel_y);
+#ifdef PARTICLES_GPU
+  GPU_Error_Check(cudaFree(d_circ_vel_x));
+  GPU_Error_Check(cudaFree(d_circ_vel_y));
+#endif
+}
+
+void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G)
+{
+#ifdef CPU_TIME
+  G.Timer.FeedbackAnalysis.Start();
+#endif
+
+#ifdef PARTICLES_CPU
+  int i, j, k, id, idm, idp;
+  int id_grav;
+  Real x, y, z, r, xpm, xpp, ypm, ypp, zpm, zpp;
+  Real Pm, Pp;
+  Real dPdx, dPdy, dPdr;
+  Real vx, vy, vz, vrms_poisson, vrms_analytic, vcp, vca, vcxp, vcyp, vcxa, vcya;
+  Real total_mass, partial_mass = 0, total_var_analytic = 0, total_var_poisson = 0, partial_var_poisson = 0,
+                   partial_var_analytic = 0;
+
+  int n_ghost_grav = G.Particles.G.n_ghost_particles_grid;
+  int ghost_diff   = n_ghost_grav - G.H.n_ghost;
+  int nx_grav      = G.Particles.G.nx_local + 2 * n_ghost_grav;
+  int ny_grav      = G.Particles.G.ny_local + 2 * n_ghost_grav;
+
+  for (k = 0; k < G.H.nz_real; k++) {
+    for (j = 0; j < G.H.ny_real; j++) {
+      for (i = 0; i < G.H.nx_real; i++) {
+        id = (i + G.H.n_ghost) + (j + G.H.n_ghost) * G.H.nx + (k + G.H.n_ghost) * G.H.nx * G.H.ny;
+        partial_mass += G.C.density[id];
+      }
+    }
+  }
+  #ifdef MPI_CHOLLA
+  MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world);
+  #else
+  total_mass = partial_mass;
+  #endif
+
+  for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) {
+    for (j = G.H.n_ghost; j < G.H.ny - G.H.n_ghost; j++) {
+      for (i = G.H.n_ghost; i < G.H.nx - G.H.n_ghost; i++) {
+        id      = i + j * G.H.nx + k * G.H.nx * G.H.ny;
+        id_grav = (i + ghost_diff) + (j + ghost_diff) * nx_grav + (k + ghost_diff) * nx_grav * ny_grav;
+
+        if (G.C.density[id] < VRMS_CUTOFF_DENSITY) continue;  // in cgs, this is 0.01 cm^{-3}
+
+        G.Get_Position(i, j, k, &x, &y, &z);
+        r = sqrt(x * x + y * y);
+
+        vcp  = sqrt(r * fabs(G.Particles.G.gravity_x[id_grav] * x / r + G.Particles.G.gravity_y[id_grav] * y / r));
+        vcxp = -y / r * vcp;
+        vcyp = x / r * vcp;
+        vx   = G.C.momentum_x[id] / G.C.density[id];
+        vy   = G.C.momentum_y[id] / G.C.density[id];
+        vz   = G.C.momentum_z[id] / G.C.density[id];
+
+        partial_var_poisson += ((vx - vcxp) * (vx - vcxp) + (vy - vcyp) * (vy - vcyp) + vz * vz) * G.C.density[id];
+        partial_var_analytic += ((vx - h_circ_vel_x[id]) * (vx - h_circ_vel_x[id]) +
+                                 (vy - h_circ_vel_y[id]) * (vy - h_circ_vel_y[id]) + (vz * vz)) *
+                                G.C.density[id];
+      }
+    }
+  }
+  partial_var_poisson /= total_mass;
+  partial_var_analytic /= total_mass;
+
+  #ifdef MPI_CHOLLA
+  MPI_Reduce(&partial_var_poisson, &total_var_poisson, 1, MPI_CHREAL, MPI_SUM, root, world);
+  MPI_Reduce(&partial_var_analytic, &total_var_analytic, 1, MPI_CHREAL, MPI_SUM, root, world);
+
+  #else
+  total_var_poisson  = partial_var_poisson;
+  total_var_analytic = partial_var_analytic;
+  #endif
+
+  vrms_poisson  = sqrt(total_var_poisson) * VELOCITY_UNIT / 1e5;  // output in km/s
+  vrms_analytic = sqrt(total_var_analytic) * VELOCITY_UNIT / 1e5;
+
+  chprintf("feedback: time %f, dt=%f, vrms_p = %f km/s, vrms_a = %f km/s\n", G.H.t, G.H.dt, vrms_poisson,
+           vrms_analytic);
+
+#elif defined(PARTICLES_GPU)
+  Compute_Gas_Velocity_Dispersion_GPU(G);
+#endif  // PARTICLES_CPU
+
+#ifdef CPU_TIME
+  G.Timer.FeedbackAnalysis.End();
+#endif
+}
diff --git a/src/analysis/feedback_analysis.h b/src/analysis/feedback_analysis.h
new file mode 100644
index 000000000..9b29420f4
--- /dev/null
+++ b/src/analysis/feedback_analysis.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <stdio.h>
+
+#include "../global/global.h"
+#include "../grid/grid3D.h"
+
+class FeedbackAnalysis
+{
+  Real *h_circ_vel_x, *h_circ_vel_y;
+
+#ifdef PARTICLES_GPU
+  Real *d_circ_vel_x, *d_circ_vel_y;
+  void Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G);
+#endif
+
+ public:
+  int countSN{0};
+  int countResolved{0};
+  int countUnresolved{0};
+  Real totalEnergy{0};
+  Real totalMomentum{0};
+  Real totalUnresEnergy{0};
+
+  FeedbackAnalysis(Grid3D& G);
+  ~FeedbackAnalysis();
+
+  void Compute_Gas_Velocity_Dispersion(Grid3D& G);
+  void Reset();
+};
\ No newline at end of file
diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu
new file mode 100644
index 000000000..11132bece
--- /dev/null
+++ b/src/analysis/feedback_analysis_gpu.cu
@@ -0,0 +1,205 @@
+
+
+#include <cstdio>
+
+#include "../io/io.h"
+#include "feedback_analysis.h"
+#ifdef PARTICLES_GPU
+
+  #define MU 0.6
+  // in cgs, this is 0.01 cm^{-3}
+  #define MIN_DENSITY  (0.01 * MP * MU * LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT)  // 148279.7
+  #define TPB_ANALYSIS 1024
+
+__device__ void Warp_Reduce(volatile Real *buff, size_t tid)
+{
+  if (TPB_ANALYSIS >= 64) {
+    buff[tid] += buff[tid + 32];
+  }
+  if (TPB_ANALYSIS >= 32) {
+    buff[tid] += buff[tid + 16];
+  }
+  if (TPB_ANALYSIS >= 16) {
+    buff[tid] += buff[tid + 8];
+  }
+  if (TPB_ANALYSIS >= 8) {
+    buff[tid] += buff[tid + 4];
+  }
+  if (TPB_ANALYSIS >= 4) {
+    buff[tid] += buff[tid + 2];
+  }
+  if (TPB_ANALYSIS >= 2) {
+    buff[tid] += buff[tid + 1];
+  }
+}
+
+void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real *density, Real *momentum_x,
+                                        Real *momentum_y, Real *momentum_z, Real *circ_vel_x, Real *circ_vel_y,
+                                        Real *partial_mass, Real *partial_vel)
+{
+  __shared__ Real s_mass[TPB_ANALYSIS];
+  __shared__ Real s_vel[TPB_ANALYSIS];
+  int id, zid, yid, xid, tid;
+
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
+  tid = threadIdx.x;
+
+  s_mass[tid] = 0;
+  s_vel[tid]  = 0;
+  Real vx, vy, vz;
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost && density[id] > MIN_DENSITY) {
+    s_mass[tid] = density[id];
+    vx          = momentum_x[id] / density[id];
+    vy          = momentum_y[id] / density[id];
+    vz          = momentum_z[id] / density[id];
+    s_vel[tid] =
+        ((vx - circ_vel_x[id]) * (vx - circ_vel_x[id]) + (vy - circ_vel_y[id]) * (vy - circ_vel_y[id]) + (vz * vz)) *
+        density[id];
+  }
+  __syncthreads();
+
+  for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      s_mass[tid] += s_mass[tid + s];
+      s_vel[tid] += s_vel[tid + s];
+    }
+    __syncthreads();
+  }
+  if (tid == 0) {
+    // printf("ReduceKernel 1: blockIdx.x = %d -> s_mass[0] = %.5e, s_vel[0] =
+    // %.5e\n", blockIdx.x, s_mass[0], s_vel[0]);
+    partial_mass[blockIdx.x] = s_mass[0];
+    partial_vel[blockIdx.x]  = s_vel[0];
+  }
+}
+
+void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *output_m, Real *output_v, int n)
+{
+  __shared__ Real s_mass[TPB_ANALYSIS];
+  __shared__ Real s_vel[TPB_ANALYSIS];
+
+  size_t tid      = threadIdx.x;
+  size_t i        = blockIdx.x * (TPB_ANALYSIS) + tid;
+  size_t gridSize = TPB_ANALYSIS * gridDim.x;
+  s_mass[tid]     = 0;
+  s_vel[tid]      = 0;
+
+  while (i < n) {
+    s_mass[tid] += input_m[i];
+    s_vel[tid] += input_v[i];
+    i += gridSize;
+  }
+  __syncthreads();
+
+  if (TPB_ANALYSIS >= 1024) {
+    if (tid < 512) {
+      s_mass[tid] += s_mass[tid + 512];
+      s_vel[tid] += s_vel[tid + 512];
+    }
+    __syncthreads();
+  }
+  if (TPB_ANALYSIS >= 512) {
+    if (tid < 256) {
+      s_mass[tid] += s_mass[tid + 256];
+      s_vel[tid] += s_vel[tid + 256];
+    }
+    __syncthreads();
+  }
+  if (TPB_ANALYSIS >= 256) {
+    if (tid < 128) {
+      s_mass[tid] += s_mass[tid + 128];
+      s_vel[tid] += s_vel[tid + 128];
+    }
+    __syncthreads();
+  }
+  if (TPB_ANALYSIS >= 128) {
+    if (tid < 64) {
+      s_mass[tid] += s_mass[tid + 64];
+      s_vel[tid] += s_vel[tid + 64];
+    }
+    __syncthreads();
+  }
+
+  if (tid < 32) {
+    Warp_Reduce(s_mass, tid);
+    Warp_Reduce(s_vel, tid);
+  }
+  __syncthreads();
+
+  if (tid == 0) {
+    // printf("Reduce_Tubulence_kernel 2: n = %d/%d, blockIdx.x = %d ->
+    // s_mass[0] = %.5e, s_vel[0] = %.5e\n",
+    //        n, gridDim.x, blockIdx.x, s_mass[0], s_vel[0]);
+    output_m[blockIdx.x] = s_mass[0];
+    output_v[blockIdx.x] = s_vel[0];
+  }
+}
+
+void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G)
+{
+  size_t ngrid = std::ceil((1. * G.H.nx * G.H.ny * G.H.nz) / TPB_ANALYSIS);
+
+  Real *d_partial_mass;
+  Real *d_partial_vel;
+  Real *h_partial_mass = (Real *)malloc(ngrid * sizeof(Real));
+  Real *h_partial_vel  = (Real *)malloc(ngrid * sizeof(Real));
+  GPU_Error_Check(cudaMalloc((void **)&d_partial_mass, ngrid * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc((void **)&d_partial_vel, ngrid * sizeof(Real)));
+
+  Real total_mass = 0;
+  Real total_vel  = 0;
+
+  hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost,
+                     G.C.d_density, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, d_circ_vel_x, d_circ_vel_y,
+                     d_partial_mass, d_partial_vel);
+
+  size_t n         = ngrid;
+  Real *mass_input = d_partial_mass;
+  Real *vel_input  = d_partial_vel;
+  while (n > TPB_ANALYSIS) {
+    ngrid = std::ceil((n * 1.) / TPB_ANALYSIS);
+    // printf("Reduce_Tubulence: Next kernel call grid size is %d\n", ngrid);
+    hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, ngrid, TPB_ANALYSIS, 0, 0, mass_input, vel_input, d_partial_mass,
+                       d_partial_vel, n);
+    mass_input = d_partial_mass;
+    vel_input  = d_partial_vel;
+    n          = ngrid;
+  }
+
+  if (n > 1) {
+    hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, 1, TPB_ANALYSIS, 0, 0, d_partial_mass, d_partial_vel, d_partial_mass,
+                       d_partial_vel, n);
+  }
+
+  // cudaDeviceSynchronize();
+
+  GPU_Error_Check(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), cudaMemcpyDeviceToHost));
+  GPU_Error_Check(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), cudaMemcpyDeviceToHost));
+
+  #ifdef MPI_CHOLLA
+  MPI_Allreduce(h_partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world);
+  MPI_Allreduce(h_partial_vel, &total_vel, 1, MPI_CHREAL, MPI_SUM, world);
+  #else
+  total_mass = h_partial_mass[0];
+  total_vel  = h_partial_vel[0];
+  #endif
+
+  if (total_vel < 0 || total_mass < 0) {
+    chprintf("feedback trouble.  total_vel = %.3e, total_mass = %.3e\n", total_vel, total_mass);
+  }
+
+  chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt,
+           sqrt(total_vel / total_mass) * VELOCITY_UNIT / 1e5);
+
+  GPU_Error_Check(cudaFree(d_partial_vel));
+  GPU_Error_Check(cudaFree(d_partial_mass));
+
+  free(h_partial_mass);
+  free(h_partial_vel);
+}
+
+#endif  // PARTICLES_GPU
diff --git a/src/analysis/io_analysis.cpp b/src/analysis/io_analysis.cpp
index 3f0141c05..962503dea 100644
--- a/src/analysis/io_analysis.cpp
+++ b/src/analysis/io_analysis.cpp
@@ -1,18 +1,17 @@
 #ifdef ANALYSIS
 
-#include <iostream>
-#include <fstream>
-#include "../analysis/analysis.h"
-#include "../io/io.h"
-#include "../grid/grid3D.h"
+  #include <fstream>
+  #include <iostream>
 
-using namespace std;
+  #include "../analysis/analysis.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
 
 // #define OUTPUT_SKEWERS_TRANSMITTED_FLUX
 
-#ifdef OUTPUT_SKEWERS
-void Grid3D::Output_Skewers_File( struct parameters *P ){
-  
+  #ifdef OUTPUT_SKEWERS
+void Grid3D::Output_Skewers_File(struct Parameters *P)
+{
   FILE *out;
   char filename[180];
   char timestep[20];
@@ -20,439 +19,451 @@ void Grid3D::Output_Skewers_File( struct parameters *P ){
   // create the filename
   strcpy(filename, P->skewersdir);
   sprintf(timestep, "%d", Analysis.n_file);
-  strcat(filename,timestep);
+  strcat(filename, timestep);
   // a binary file is created for each process
   // only one HDF5 file is created
-  strcat(filename,"_skewers");
-  strcat(filename,".h5");
-  
-  
+  strcat(filename, "_skewers");
+  strcat(filename, ".h5");
+
   chprintf("Writing Skewers File:  %d   ", Analysis.n_file);
-  
-  hid_t   file_id;
-  herr_t  status;
-  
+
+  hid_t file_id;
+  herr_t status;
+
   // Create a new file collectively
   file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
-  Write_Skewers_Header_HDF5( file_id );
-  Write_Skewers_Data_HDF5( file_id );
-  
+  Write_Skewers_Header_HDF5(file_id);
+  Write_Skewers_Data_HDF5(file_id);
+
   // Close the file
   status = H5Fclose(file_id);
-  
-  chprintf("Saved Skewers File.\n");  
-  
-}
 
+  chprintf("Saved Skewers File.\n");
+}
 
+void Grid3D::Write_Skewers_Header_HDF5(hid_t file_id)
+{
+  hid_t attribute_id, dataspace_id;
+  herr_t status;
+  hsize_t attr_dims;
+  int int_data[3];
+  Real Real_data[3];
 
-void Grid3D::Write_Skewers_Header_HDF5( hid_t file_id ){
-  hid_t     attribute_id, dataspace_id;
-  herr_t    status;
-  hsize_t   attr_dims;
-  int       int_data[3];
-  Real      Real_data[3];
-  
-  Real H0 = Cosmo.cosmo_h*100;
+  Real H0 = Cosmo.cosmo_h * 100;
 
   // Single attributes first
   attr_dims = 1;
   // Create the data space for the attribute
   dataspace_id = H5Screate_simple(1, &attr_dims, NULL);
-  #ifdef COSMOLOGY
+    #ifdef COSMOLOGY
   attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b);
-  status = H5Aclose(attribute_id);
-  #endif
-     
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b);
+  status       = H5Aclose(attribute_id);
+    #endif
+
   status = H5Sclose(dataspace_id);
-  
+
   // 3D atributes now
   attr_dims = 3;
   // Create the data space for the attribute
   dataspace_id = H5Screate_simple(1, &attr_dims, NULL);
-  
+
   Real_data[0] = Analysis.Lbox_x;
   Real_data[1] = Analysis.Lbox_y;
   Real_data[2] = Analysis.Lbox_z;
-  
-  attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); 
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
-  
-  status = H5Sclose(dataspace_id);
-  
-}
-
-
 
+  attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
+  status       = H5Aclose(attribute_id);
 
-void Grid3D::Write_Skewers_Data_HDF5( hid_t file_id ){
+  status = H5Sclose(dataspace_id);
+}
 
+void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id)
+{
   int n_global_x, n_global_y, n_global_z;
   int n_los_x, n_los_y, n_los_z;
   n_global_x = Analysis.n_skewers_processed_x;
   n_global_y = Analysis.n_skewers_processed_y;
   n_global_z = Analysis.n_skewers_processed_z;
-  n_los_x = Analysis.nx_total;
-  n_los_y = Analysis.ny_total;
-  n_los_z = Analysis.nz_total;
-  
+  n_los_x    = Analysis.nx_total;
+  n_los_y    = Analysis.ny_total;
+  n_los_z    = Analysis.nz_total;
+
   Real *dataset_buffer_x;
   Real *dataset_buffer_y;
   Real *dataset_buffer_z;
-  
+
   int data_id, buffer_id;
-  
-  herr_t    status;
-  hid_t     dataset_id;
-  
-  //Write Skerwes X
-  dataset_buffer_x = (Real *) malloc(n_global_x*n_los_x*sizeof(Real));
-  hsize_t  dims_x[2];
+
+  herr_t status;
+  hid_t dataset_id;
+
+  // Write Skerwes X
+  dataset_buffer_x = (Real *)malloc(n_global_x * n_los_x * sizeof(Real));
+  hsize_t dims_x[2];
   dims_x[0] = n_global_x;
   dims_x[1] = n_los_x;
   hid_t skewers_group_x, dataspace_id_skewers_x;
   dataspace_id_skewers_x = H5Screate_simple(2, dims_x, NULL);
   skewers_group_x        = H5Gcreate(file_id, "skewers_x", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
 
-  for ( int skewer_id=0; skewer_id<n_global_x; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_x; los_id ++ ){
-      data_id   = skewer_id * n_los_x + los_id; 
-      buffer_id = skewer_id * n_los_x + los_id;
+  for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_x; los_id++) {
+      data_id                     = skewer_id * n_los_x + los_id;
+      buffer_id                   = skewer_id * n_los_x + los_id;
       dataset_buffer_x[buffer_id] = Analysis.skewers_density_x_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_x, "density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x );
-  status = H5Dclose(dataset_id);
-  
-  
-  for ( int skewer_id=0; skewer_id<n_global_x; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_x; los_id ++ ){
-      data_id   = skewer_id * n_los_x + los_id; 
-      buffer_id = skewer_id * n_los_x + los_id;
+  dataset_id = H5Dcreate(skewers_group_x, "density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT,
+                         H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_x; los_id++) {
+      data_id                     = skewer_id * n_los_x + los_id;
+      buffer_id                   = skewer_id * n_los_x + los_id;
       dataset_buffer_x[buffer_id] = Analysis.skewers_HI_density_x_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_x, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_x; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_x; los_id ++ ){
-      data_id   = skewer_id * n_los_x + los_id; 
-      buffer_id = skewer_id * n_los_x + los_id;
+  dataset_id = H5Dcreate(skewers_group_x, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_x; los_id++) {
+      data_id                     = skewer_id * n_los_x + los_id;
+      buffer_id                   = skewer_id * n_los_x + los_id;
       dataset_buffer_x[buffer_id] = Analysis.skewers_HeII_density_x_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_x, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_x; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_x; los_id ++ ){
-      data_id   = skewer_id * n_los_x + los_id; 
-      buffer_id = skewer_id * n_los_x + los_id;
+  dataset_id = H5Dcreate(skewers_group_x, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_x; los_id++) {
+      data_id                     = skewer_id * n_los_x + los_id;
+      buffer_id                   = skewer_id * n_los_x + los_id;
       dataset_buffer_x[buffer_id] = Analysis.skewers_temperature_x_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_x, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_x; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_x; los_id ++ ){
-      data_id   = skewer_id * n_los_x + los_id; 
-      buffer_id = skewer_id * n_los_x + los_id;
+  dataset_id = H5Dcreate(skewers_group_x, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_x; los_id++) {
+      data_id                     = skewer_id * n_los_x + los_id;
+      buffer_id                   = skewer_id * n_los_x + los_id;
       dataset_buffer_x[buffer_id] = Analysis.skewers_los_velocity_x_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_x, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x );
-  status = H5Dclose(dataset_id);
-  
-  #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX
-  for ( int skewer_id=0; skewer_id<n_global_x; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_x; los_id ++ ){
-      data_id   = skewer_id * n_los_x + los_id; 
-      buffer_id = skewer_id * n_los_x + los_id;
+  dataset_id = H5Dcreate(skewers_group_x, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x);
+  status     = H5Dclose(dataset_id);
+
+    #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX
+  for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_x; los_id++) {
+      data_id                     = skewer_id * n_los_x + los_id;
+      buffer_id                   = skewer_id * n_los_x + los_id;
       dataset_buffer_x[buffer_id] = Analysis.skewers_transmitted_flux_HI_x_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_x, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_x; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_x; los_id ++ ){
-      data_id   = skewer_id * n_los_x + los_id;
-      buffer_id = skewer_id * n_los_x + los_id; 
+  dataset_id = H5Dcreate(skewers_group_x, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_x,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_x; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_x; los_id++) {
+      data_id                     = skewer_id * n_los_x + los_id;
+      buffer_id                   = skewer_id * n_los_x + los_id;
       dataset_buffer_x[buffer_id] = Analysis.skewers_transmitted_flux_HeII_x_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_x, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x );
-  status = H5Dclose(dataset_id);
-  #endif 
-  
-  free( dataset_buffer_x );
-  
-  
-  //Write Skerwes Y 
-  dataset_buffer_y = (Real *) malloc(n_global_y*n_los_y*sizeof(Real));
-  hsize_t  dims_y[2];
+  dataset_id = H5Dcreate(skewers_group_x, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_x,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_x);
+  status     = H5Dclose(dataset_id);
+    #endif
+
+  free(dataset_buffer_x);
+
+  // Write Skerwes Y
+  dataset_buffer_y = (Real *)malloc(n_global_y * n_los_y * sizeof(Real));
+  hsize_t dims_y[2];
   dims_y[0] = n_global_y;
   dims_y[1] = n_los_y;
   hid_t skewers_group_y, dataspace_id_skewers_y;
   dataspace_id_skewers_y = H5Screate_simple(2, dims_y, NULL);
   skewers_group_y        = H5Gcreate(file_id, "skewers_y", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
 
-  for ( int skewer_id=0; skewer_id<n_global_y; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_y; los_id ++ ){
-      data_id   = skewer_id * n_los_y + los_id;
-      buffer_id = skewer_id * n_los_y + los_id; 
+  for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_y; los_id++) {
+      data_id                     = skewer_id * n_los_y + los_id;
+      buffer_id                   = skewer_id * n_los_y + los_id;
       dataset_buffer_y[buffer_id] = Analysis.skewers_density_y_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_y, "density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y );
-  status = H5Dclose(dataset_id);
-    
-  for ( int skewer_id=0; skewer_id<n_global_y; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_y; los_id ++ ){
-      data_id   = skewer_id * n_los_y + los_id;
-      buffer_id = skewer_id * n_los_y + los_id; 
+  dataset_id = H5Dcreate(skewers_group_y, "density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT,
+                         H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_y; los_id++) {
+      data_id                     = skewer_id * n_los_y + los_id;
+      buffer_id                   = skewer_id * n_los_y + los_id;
       dataset_buffer_y[buffer_id] = Analysis.skewers_HI_density_y_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_y, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y );
-  status = H5Dclose(dataset_id);
-    
-  for ( int skewer_id=0; skewer_id<n_global_y; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_y; los_id ++ ){
-      data_id   = skewer_id * n_los_y + los_id;
-      buffer_id = skewer_id * n_los_y + los_id; 
+  dataset_id = H5Dcreate(skewers_group_y, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_y; los_id++) {
+      data_id                     = skewer_id * n_los_y + los_id;
+      buffer_id                   = skewer_id * n_los_y + los_id;
       dataset_buffer_y[buffer_id] = Analysis.skewers_HeII_density_y_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_y, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y );
-  status = H5Dclose(dataset_id);
-    
-  for ( int skewer_id=0; skewer_id<n_global_y; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_y; los_id ++ ){
-      data_id   = skewer_id * n_los_y + los_id;
-      buffer_id = skewer_id * n_los_y + los_id; 
+  dataset_id = H5Dcreate(skewers_group_y, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_y; los_id++) {
+      data_id                     = skewer_id * n_los_y + los_id;
+      buffer_id                   = skewer_id * n_los_y + los_id;
       dataset_buffer_y[buffer_id] = Analysis.skewers_temperature_y_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_y, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y );
-  status = H5Dclose(dataset_id);
-
-  for ( int skewer_id=0; skewer_id<n_global_y; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_y; los_id ++ ){
-      data_id   = skewer_id * n_los_y + los_id;
-      buffer_id = skewer_id * n_los_y + los_id; 
+  dataset_id = H5Dcreate(skewers_group_y, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_y; los_id++) {
+      data_id                     = skewer_id * n_los_y + los_id;
+      buffer_id                   = skewer_id * n_los_y + los_id;
       dataset_buffer_y[buffer_id] = Analysis.skewers_los_velocity_y_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_y, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y );
-  status = H5Dclose(dataset_id);
-  
-  #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX
-  for ( int skewer_id=0; skewer_id<n_global_y; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_y; los_id ++ ){
-      data_id   = skewer_id * n_los_y + los_id;
-      buffer_id = skewer_id * n_los_y + los_id; 
+  dataset_id = H5Dcreate(skewers_group_y, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y);
+  status     = H5Dclose(dataset_id);
+
+    #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX
+  for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_y; los_id++) {
+      data_id                     = skewer_id * n_los_y + los_id;
+      buffer_id                   = skewer_id * n_los_y + los_id;
       dataset_buffer_y[buffer_id] = Analysis.skewers_transmitted_flux_HI_y_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_y, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_y; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_y; los_id ++ ){
-      data_id   = skewer_id * n_los_y + los_id; 
-      buffer_id = skewer_id * n_los_y + los_id;
+  dataset_id = H5Dcreate(skewers_group_y, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_y,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_y; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_y; los_id++) {
+      data_id                     = skewer_id * n_los_y + los_id;
+      buffer_id                   = skewer_id * n_los_y + los_id;
       dataset_buffer_y[buffer_id] = Analysis.skewers_transmitted_flux_HeII_y_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_y, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y );
-  status = H5Dclose(dataset_id);
-  #endif
-  
-  free( dataset_buffer_y );
-  
-  //Write Skerwes Z 
-  dataset_buffer_z = (Real *) malloc(n_global_z*n_los_z*sizeof(Real));
-  hsize_t  dims_z[2];
+  dataset_id = H5Dcreate(skewers_group_y, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_y,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_y);
+  status     = H5Dclose(dataset_id);
+    #endif
+
+  free(dataset_buffer_y);
+
+  // Write Skerwes Z
+  dataset_buffer_z = (Real *)malloc(n_global_z * n_los_z * sizeof(Real));
+  hsize_t dims_z[2];
   dims_z[0] = n_global_z;
   dims_z[1] = n_los_z;
   hid_t skewers_group_z, dataspace_id_skewers_z;
   dataspace_id_skewers_z = H5Screate_simple(2, dims_z, NULL);
   skewers_group_z        = H5Gcreate(file_id, "skewers_z", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  
-  for ( int skewer_id=0; skewer_id<n_global_z; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_z; los_id ++ ){
-      data_id   = skewer_id * n_los_z + los_id; 
-      buffer_id = skewer_id * n_los_z + los_id;
+
+  for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_z; los_id++) {
+      data_id                     = skewer_id * n_los_z + los_id;
+      buffer_id                   = skewer_id * n_los_z + los_id;
       dataset_buffer_z[buffer_id] = Analysis.skewers_density_z_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_z, "density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z );
-  status = H5Dclose(dataset_id);
-
-  for ( int skewer_id=0; skewer_id<n_global_z; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_z; los_id ++ ){
-      data_id   = skewer_id * n_los_z + los_id; 
-      buffer_id = skewer_id * n_los_z + los_id;
+  dataset_id = H5Dcreate(skewers_group_z, "density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT,
+                         H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_z; los_id++) {
+      data_id                     = skewer_id * n_los_z + los_id;
+      buffer_id                   = skewer_id * n_los_z + los_id;
       dataset_buffer_z[buffer_id] = Analysis.skewers_HI_density_z_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_z, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_z; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_z; los_id ++ ){
-      data_id   = skewer_id * n_los_z + los_id; 
-      buffer_id = skewer_id * n_los_z + los_id;
+  dataset_id = H5Dcreate(skewers_group_z, "HI_density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_z; los_id++) {
+      data_id                     = skewer_id * n_los_z + los_id;
+      buffer_id                   = skewer_id * n_los_z + los_id;
       dataset_buffer_z[buffer_id] = Analysis.skewers_HeII_density_z_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_z, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z );
-  status = H5Dclose(dataset_id);
-
-  for ( int skewer_id=0; skewer_id<n_global_z; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_z; los_id ++ ){
-      data_id   = skewer_id * n_los_z + los_id; 
-      buffer_id = skewer_id * n_los_z + los_id;
+  dataset_id = H5Dcreate(skewers_group_z, "HeII_density", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_z; los_id++) {
+      data_id                     = skewer_id * n_los_z + los_id;
+      buffer_id                   = skewer_id * n_los_z + los_id;
       dataset_buffer_z[buffer_id] = Analysis.skewers_temperature_z_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_z, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_z; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_z; los_id ++ ){
-      data_id   = skewer_id * n_los_z + los_id; 
-      buffer_id = skewer_id * n_los_z + los_id;
+  dataset_id = H5Dcreate(skewers_group_z, "temperature", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_z; los_id++) {
+      data_id                     = skewer_id * n_los_z + los_id;
+      buffer_id                   = skewer_id * n_los_z + los_id;
       dataset_buffer_z[buffer_id] = Analysis.skewers_los_velocity_z_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_z, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z );
-  status = H5Dclose(dataset_id);
-  
-  #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX
-  for ( int skewer_id=0; skewer_id<n_global_z; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_z; los_id ++ ){
-      data_id   = skewer_id * n_los_z + los_id; 
-      buffer_id = skewer_id * n_los_z + los_id;
+  dataset_id = H5Dcreate(skewers_group_z, "los_velocity", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT,
+                         H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z);
+  status     = H5Dclose(dataset_id);
+
+    #ifdef OUTPUT_SKEWERS_TRANSMITTED_FLUX
+  for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_z; los_id++) {
+      data_id                     = skewer_id * n_los_z + los_id;
+      buffer_id                   = skewer_id * n_los_z + los_id;
       dataset_buffer_z[buffer_id] = Analysis.skewers_transmitted_flux_HI_z_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_z, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z );
-  status = H5Dclose(dataset_id);
-  
-  for ( int skewer_id=0; skewer_id<n_global_z; skewer_id++ ){
-    for ( int los_id=0; los_id<n_los_z; los_id ++ ){
-      data_id   = skewer_id * n_los_z + los_id;
-      buffer_id = skewer_id * n_los_z + los_id; 
+  dataset_id = H5Dcreate(skewers_group_z, "los_transmitted_flux_HI", H5T_IEEE_F64BE, dataspace_id_skewers_z,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z);
+  status     = H5Dclose(dataset_id);
+
+  for (int skewer_id = 0; skewer_id < n_global_z; skewer_id++) {
+    for (int los_id = 0; los_id < n_los_z; los_id++) {
+      data_id                     = skewer_id * n_los_z + los_id;
+      buffer_id                   = skewer_id * n_los_z + los_id;
       dataset_buffer_z[buffer_id] = Analysis.skewers_transmitted_flux_HeII_z_global[data_id];
     }
   }
-  dataset_id = H5Dcreate(skewers_group_z, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z );
-  status = H5Dclose(dataset_id);
-  #endif
-  
-  free( dataset_buffer_z );
+  dataset_id = H5Dcreate(skewers_group_z, "los_transmitted_flux_HeII", H5T_IEEE_F64BE, dataspace_id_skewers_z,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_z);
+  status     = H5Dclose(dataset_id);
+    #endif
+
+  free(dataset_buffer_z);
 
   int n_ghost;
   n_ghost = Analysis.n_ghost_skewer;
-  
+
   hid_t dataspace_id_skewer_x;
-  hsize_t   dims1d_x[1];
-  dims1d_x[0] = n_los_x;
+  hsize_t dims1d_x[1];
+  dims1d_x[0]           = n_los_x;
   dataspace_id_skewer_x = H5Screate_simple(1, dims1d_x, NULL);
-  Real *buffer_skewer_x = (Real *) malloc(n_los_x*sizeof(Real));
-  for ( int los_id=0; los_id<n_los_x; los_id++ ){
-    buffer_skewer_x[los_id] = Analysis.full_vel_Hubble_x[los_id+n_ghost] / 1e5 ; //km/s
+  Real *buffer_skewer_x = (Real *)malloc(n_los_x * sizeof(Real));
+  for (int los_id = 0; los_id < n_los_x; los_id++) {
+    buffer_skewer_x[los_id] = Analysis.full_vel_Hubble_x[los_id + n_ghost] / 1e5;  // km/s
   }
-  dataset_id = H5Dcreate(skewers_group_x, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_x, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_x);
-  status = H5Dclose(dataset_id);
-    
+  dataset_id = H5Dcreate(skewers_group_x, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_x, H5P_DEFAULT, H5P_DEFAULT,
+                         H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_x);
+  status     = H5Dclose(dataset_id);
+
   hid_t dataspace_id_skewer_y;
-  hsize_t   dims1d_y[1];
-  dims1d_y[0] = n_los_y;
+  hsize_t dims1d_y[1];
+  dims1d_y[0]           = n_los_y;
   dataspace_id_skewer_y = H5Screate_simple(1, dims1d_y, NULL);
-  Real *buffer_skewer_y = (Real *) malloc(n_los_y*sizeof(Real));
-  for ( int los_id=0; los_id<n_los_y; los_id++ ){
-    buffer_skewer_y[los_id] = Analysis.full_vel_Hubble_y[los_id+n_ghost] / 1e5 ; //km/s
+  Real *buffer_skewer_y = (Real *)malloc(n_los_y * sizeof(Real));
+  for (int los_id = 0; los_id < n_los_y; los_id++) {
+    buffer_skewer_y[los_id] = Analysis.full_vel_Hubble_y[los_id + n_ghost] / 1e5;  // km/s
   }
-  dataset_id = H5Dcreate(skewers_group_y, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_y, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_y);
-  status = H5Dclose(dataset_id);
+  dataset_id = H5Dcreate(skewers_group_y, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_y, H5P_DEFAULT, H5P_DEFAULT,
+                         H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_y);
+  status     = H5Dclose(dataset_id);
 
   hid_t dataspace_id_skewer_z;
-  hsize_t   dims1d_z[1];
-  dims1d_z[0] = n_los_z;
+  hsize_t dims1d_z[1];
+  dims1d_z[0]           = n_los_z;
   dataspace_id_skewer_z = H5Screate_simple(1, dims1d_z, NULL);
-  Real *buffer_skewer_z = (Real *) malloc(n_los_z*sizeof(Real));
-  for ( int los_id=0; los_id<n_los_z; los_id++ ){
-    buffer_skewer_z[los_id] = Analysis.full_vel_Hubble_z[los_id+n_ghost] / 1e5 ; //km/s
+  Real *buffer_skewer_z = (Real *)malloc(n_los_z * sizeof(Real));
+  for (int los_id = 0; los_id < n_los_z; los_id++) {
+    buffer_skewer_z[los_id] = Analysis.full_vel_Hubble_z[los_id + n_ghost] / 1e5;  // km/s
   }
-  dataset_id = H5Dcreate(skewers_group_z, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_z, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_z);
-  status = H5Dclose(dataset_id);
-  
-  free( buffer_skewer_x );
-  free( buffer_skewer_y );
-  free( buffer_skewer_z );
+  dataset_id = H5Dcreate(skewers_group_z, "vel_Hubble", H5T_IEEE_F64BE, dataspace_id_skewer_z, H5P_DEFAULT, H5P_DEFAULT,
+                         H5P_DEFAULT);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_skewer_z);
+  status     = H5Dclose(dataset_id);
+
+  free(buffer_skewer_x);
+  free(buffer_skewer_y);
+  free(buffer_skewer_z);
 
   status = H5Gclose(skewers_group_x);
   status = H5Gclose(skewers_group_y);
-  status = H5Gclose(skewers_group_z);  
-
-  
+  status = H5Gclose(skewers_group_z);
 }
 
-#endif//OUTPUT_SKEWERS
-
+  #endif  // OUTPUT_SKEWERS
 
-void Grid3D::Output_Analysis( struct parameters *P ){
-  
+void Grid3D::Output_Analysis(struct Parameters *P)
+{
   #ifdef OUTPUT_SKEWERS
-  Output_Skewers_File( P );
+  Output_Skewers_File(P);
   #endif
-  
+
   FILE *out;
   char filename[180];
   char timestep[20];
@@ -460,65 +471,62 @@ void Grid3D::Output_Analysis( struct parameters *P ){
   // create the filename
   strcpy(filename, P->analysisdir);
   sprintf(timestep, "%d", Analysis.n_file);
-  strcat(filename,timestep);
+  strcat(filename, timestep);
   // a binary file is created for each process
   // only one HDF5 file is created
-  strcat(filename,"_analysis");
-  strcat(filename,".h5");
-  
-  
+  strcat(filename, "_analysis");
+  strcat(filename, ".h5");
+
   chprintf("Writing Analysis File: %d   ", Analysis.n_file);
-  
-  hid_t   file_id;
-  herr_t  status;
+
+  hid_t file_id;
+  herr_t status;
 
   // Create a new file collectively
   file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
-  Write_Analysis_Header_HDF5( file_id );
-  Write_Analysis_Data_HDF5( file_id );
+  Write_Analysis_Header_HDF5(file_id);
+  Write_Analysis_Data_HDF5(file_id);
 
   // Close the file
   status = H5Fclose(file_id);
 
   chprintf("Saved Analysis File.\n\n");
-
 }
 
-
-void Grid3D::Write_Analysis_Header_HDF5( hid_t file_id ){
-  hid_t     attribute_id, dataspace_id;
-  herr_t    status;
-  hsize_t   attr_dims;
-  int       int_data[3];
-  Real      Real_data[3];
-
+void Grid3D::Write_Analysis_Header_HDF5(hid_t file_id)
+{
+  hid_t attribute_id, dataspace_id;
+  herr_t status;
+  hsize_t attr_dims;
+  int int_data[3];
+  Real Real_data[3];
 
   // Single attributes first
   attr_dims = 1;
   // Create the data space for the attribute
   dataspace_id = H5Screate_simple(1, &attr_dims, NULL);
   #ifdef COSMOLOGY
-  Real H0 = Cosmo.cosmo_h*100;
+  Real H0      = Cosmo.cosmo_h * 100;
   attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b);
+  status       = H5Aclose(attribute_id);
   #endif
-     
+
   status = H5Sclose(dataspace_id);
 
   // 3D atributes now
@@ -531,157 +539,140 @@ void Grid3D::Write_Analysis_Header_HDF5( hid_t file_id ){
   Real_data[2] = Analysis.Lbox_z;
 
   attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
-  
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
+  status       = H5Aclose(attribute_id);
+
   status = H5Sclose(dataspace_id);
-  
 }
 
-
-
-void Grid3D::Write_Analysis_Data_HDF5( hid_t file_id ){
-
-
-  herr_t    status;
-  hid_t     dataset_id, dataspace_id, group_id, attribute_id;
-  hsize_t   dims2d[2];
-  hsize_t   attr_dims;
+void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id)
+{
+  herr_t status;
+  hid_t dataset_id, dataspace_id, group_id, attribute_id;
+  hsize_t dims2d[2];
+  hsize_t attr_dims;
   int nx_dset, ny_dset, j, i, id, buf_id;
 
   #ifdef PHASE_DIAGRAM
-  nx_dset = Analysis.n_temp;
-  ny_dset = Analysis.n_dens;
-  float *dataset_buffer = (float *) malloc(nx_dset*ny_dset*sizeof(Real));
-
+  nx_dset               = Analysis.n_temp;
+  ny_dset               = Analysis.n_dens;
+  float *dataset_buffer = (float *)malloc(nx_dset * ny_dset * sizeof(Real));
 
   // Create the data space for the datasets
-  dims2d[0] = nx_dset;
-  dims2d[1] = ny_dset;
+  dims2d[0]    = nx_dset;
+  dims2d[1]    = ny_dset;
   dataspace_id = H5Screate_simple(2, dims2d, NULL);
 
   group_id = H5Gcreate(file_id, "/phase_diagram", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  for (j=0; j<ny_dset; j++) {
-    for (i=0; i<nx_dset; i++) {
-      id = i + j*nx_dset;
-      buf_id = j + i*ny_dset;
+  for (j = 0; j < ny_dset; j++) {
+    for (i = 0; i < nx_dset; i++) {
+      id                     = i + j * nx_dset;
+      buf_id                 = j + i * ny_dset;
       dataset_buffer[buf_id] = Analysis.phase_diagram[id];
     }
   }
   dataset_id = H5Dcreate(group_id, "data", H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-  status = H5Dclose(dataset_id);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  status     = H5Dclose(dataset_id);
 
-  attr_dims = 1;
+  attr_dims    = 1;
   dataspace_id = H5Screate_simple(1, &attr_dims, NULL);
   attribute_id = H5Acreate(group_id, "n_temp", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_temp);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_temp);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(group_id, "n_dens", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_dens);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_dens);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(group_id, "temp_min", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.temp_min);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.temp_min);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(group_id, "temp_max", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.temp_max);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.temp_max);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(group_id, "dens_min", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.dens_min);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.dens_min);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(group_id, "dens_max", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.dens_max);
-  status = H5Aclose(attribute_id);
-
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.dens_max);
+  status       = H5Aclose(attribute_id);
 
   free(dataset_buffer);
   status = H5Gclose(group_id);
 
-  #endif//PHASE_DIAGRAM
-
+  #endif  // PHASE_DIAGRAM
 
   #ifdef LYA_STATISTICS
 
   group_id = H5Gcreate(file_id, "/lya_statistics", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
 
-
   attribute_id = H5Acreate(group_id, "n_skewers", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_skewers_processed);
-  status = H5Aclose(attribute_id);
-
+  status       = H5Awrite(attribute_id, H5T_NATIVE_INT, &Analysis.n_skewers_processed);
+  status       = H5Aclose(attribute_id);
 
   attribute_id = H5Acreate(group_id, "Flux_mean_HI", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HI);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HI);
+  status       = H5Aclose(attribute_id);
 
   attribute_id = H5Acreate(group_id, "Flux_mean_HeII", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HeII);
-  status = H5Aclose(attribute_id);
-
-  if ( Analysis.Computed_Flux_Power_Spectrum == 1 ){
-
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Analysis.Flux_mean_HeII);
+  status       = H5Aclose(attribute_id);
 
+  if (Analysis.Computed_Flux_Power_Spectrum == 1) {
     hid_t ps_group, dataspace_id_ps;
-    hsize_t   dims1d_ps[1];
-    int n_bins = Analysis.n_hist_edges_x - 1;
-    dims1d_ps[0] = n_bins;
+    hsize_t dims1d_ps[1];
+    int n_bins      = Analysis.n_hist_edges_x - 1;
+    dims1d_ps[0]    = n_bins;
     dataspace_id_ps = H5Screate_simple(1, dims1d_ps, NULL);
 
     ps_group = H5Gcreate(group_id, "power_spectrum", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
 
-    Real *buffer_ps = (Real *) malloc(n_bins*sizeof(Real));
+    Real *buffer_ps = (Real *)malloc(n_bins * sizeof(Real));
 
-    for ( int bin_id=0; bin_id<n_bins; bin_id++ ){
+    for (int bin_id = 0; bin_id < n_bins; bin_id++) {
       buffer_ps[bin_id] = Analysis.k_centers[bin_id];
     }
     dataset_id = H5Dcreate(ps_group, "k_vals", H5T_IEEE_F64BE, dataspace_id_ps, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_ps);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_ps);
+    status     = H5Dclose(dataset_id);
 
-    for ( int bin_id=0; bin_id<n_bins; bin_id++ ){
+    for (int bin_id = 0; bin_id < n_bins; bin_id++) {
       buffer_ps[bin_id] = Analysis.ps_mean[bin_id];
     }
     dataset_id = H5Dcreate(ps_group, "p(k)", H5T_IEEE_F64BE, dataspace_id_ps, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_ps);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer_ps);
+    status     = H5Dclose(dataset_id);
 
-    free( buffer_ps );
+    free(buffer_ps);
     status = H5Gclose(ps_group);
-
-
   }
-  
+
   status = H5Gclose(group_id);
 
   #endif
-
-
-
 }
 
-#ifdef COSMOLOGY
-void Analysis_Module::Load_Scale_Outputs( struct parameters *P ) {
-
+  #ifdef COSMOLOGY
+void AnalysisModule::Load_Scale_Outputs(struct Parameters *P)
+{
   char filename_1[100];
   strcpy(filename_1, P->analysis_scale_outputs_file);
-  chprintf( " Loading Analysis Scale_Factor Outpus: %s\n", filename_1);
+  chprintf(" Loading Analysis Scale_Factor Outpus: %s\n", filename_1);
 
-  ifstream file_out ( filename_1 );
+  ifstream file_out(filename_1);
   string line;
   Real a_value, current_a;
-  if (file_out.is_open()){
-    while ( getline (file_out,line) ){
-      a_value = atof( line.c_str() );
-      scale_outputs.push_back( a_value );
+  if (file_out.is_open()) {
+    while (getline(file_out, line)) {
+      a_value = atof(line.c_str());
+      scale_outputs.push_back(a_value);
       n_outputs += 1;
       // chprintf("%f\n", a_value);
     }
     file_out.close();
-    n_outputs = scale_outputs.size();
+    n_outputs        = scale_outputs.size();
     next_output_indx = 0;
     chprintf("  Loaded %d scale outputs \n", n_outputs);
-  }
-  else{
+  } else {
     chprintf("  Error: Unable to open cosmology outputs file\n");
     exit(1);
   }
@@ -689,42 +680,45 @@ void Analysis_Module::Load_Scale_Outputs( struct parameters *P ) {
   chprintf(" Setting next analysis output\n");
 
   int scale_indx = next_output_indx;
-  current_a = 1. / ( 1 + current_z );
-  a_value = scale_outputs[scale_indx];
+  current_a      = 1. / (1 + current_z);
+  a_value        = scale_outputs[scale_indx];
 
-  while ( (current_a - a_value) > 1e-4  ){
+  while ((current_a - a_value) > 1e-4) {
     // chprintf( "%f   %f\n", a_value, current_a);
     scale_indx += 1;
     a_value = scale_outputs[scale_indx];
   }
   next_output_indx = scale_indx;
-  next_output = a_value;
-  chprintf("  Next output scale index: %d  \n", next_output_indx );
+  next_output      = a_value;
+  chprintf("  Next output scale index: %d  \n", next_output_indx);
   chprintf("  Next output scale value: %f  \n", next_output);
 
-  if ( fabs(current_a - next_output) > 1e-4 ) Output_Now = false;
-  else Output_Now = true;
+  if (fabs(current_a - next_output) > 1e-4)
+    Output_Now = false;
+  else
+    Output_Now = true;
 
   n_file = next_output_indx;
-
 }
 
-void Analysis_Module::Set_Next_Scale_Output(  ){
-
+void AnalysisModule::Set_Next_Scale_Output()
+{
   int scale_indx = next_output_indx;
   Real a_value, current_a;
-  current_a = 1. / ( 1 + current_z );
-  a_value = scale_outputs[scale_indx];
-  if  ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx = 1;
-  else scale_indx += 1;
+  current_a = 1. / (1 + current_z);
+  a_value   = scale_outputs[scale_indx];
+  if ((scale_indx == 0) && (abs(a_value - current_a) < 1e-5))
+    scale_indx = 1;
+  else
+    scale_indx += 1;
   a_value = scale_outputs[scale_indx];
 
   next_output_indx = scale_indx;
-  next_output = a_value;
-  n_file = next_output_indx;
+  next_output      = a_value;
+  n_file           = next_output_indx;
 
   // chprintf("Next Analysis Output: z=%f \n", 1./next_output - 1);
 }
-#endif //COSMOLOGY
+  #endif  // COSMOLOGY
 
 #endif
diff --git a/src/analysis/lya_statistics.cpp b/src/analysis/lya_statistics.cpp
index 35f8ce337..968011bae 100644
--- a/src/analysis/lya_statistics.cpp
+++ b/src/analysis/lya_statistics.cpp
@@ -1,21 +1,22 @@
 #ifdef ANALYSIS
-#ifdef LYA_STATISTICS
+  #ifdef LYA_STATISTICS
 
-#include "../analysis/analysis.h"
-#include "../io/io.h"
-#include <unistd.h>
-#include <complex.h>
+    #include <complex.h>
+    #include <unistd.h>
 
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif
+    #include "../analysis/analysis.h"
+    #include "../io/io.h"
+
+    #ifdef MPI_CHOLLA
+      #include "../mpi/mpi_routines.h"
+    #endif
 
 // #define PRINT_ANALYSIS_LOG
 
-void Analysis_Module::Transfer_Skewers_Global_Axis( int axis ){
-  
+void AnalysisModule::Transfer_Skewers_Global_Axis(int axis)
+{
   bool am_I_root;
-  int n_skewers_root, n_los; 
+  int n_skewers_root, n_los;
   bool *root_procs;
   Real *skewers_density_root;
   Real *skewers_density_global;
@@ -32,395 +33,381 @@ void Analysis_Module::Transfer_Skewers_Global_Axis( int axis ){
   Real *skewers_F_HeII_global;
   Real *skewers_F_HeII_root;
   Real *transfer_buffer;
-  
-  
+
   // chprintf( "  Transfering Skewers \n" );
-  
-  if ( axis == 0 ){
-    am_I_root = am_I_root_x;
-    n_los = nx_total;
-    root_procs = root_procs_x;
-    n_skewers_root  = n_skewers_local_x;
-    skewers_density_root   = skewers_density_root_x;
-    skewers_density_global = skewers_density_x_global;
-    skewers_HI_density_root   = skewers_HI_density_root_x;
-    skewers_HI_density_global = skewers_HI_density_x_global;
+
+  if (axis == 0) {
+    am_I_root                   = am_I_root_x;
+    n_los                       = nx_total;
+    root_procs                  = root_procs_x;
+    n_skewers_root              = n_skewers_local_x;
+    skewers_density_root        = skewers_density_root_x;
+    skewers_density_global      = skewers_density_x_global;
+    skewers_HI_density_root     = skewers_HI_density_root_x;
+    skewers_HI_density_global   = skewers_HI_density_x_global;
     skewers_HeII_density_root   = skewers_HeII_density_root_x;
     skewers_HeII_density_global = skewers_HeII_density_x_global;
-    skewers_temperature_root   = skewers_temperature_root_x;
-    skewers_temperature_global = skewers_temperature_x_global;
+    skewers_temperature_root    = skewers_temperature_root_x;
+    skewers_temperature_global  = skewers_temperature_x_global;
     skewers_los_velocity_root   = skewers_velocity_root_x;
     skewers_los_velocity_global = skewers_los_velocity_x_global;
-    skewers_F_HI_global   = skewers_transmitted_flux_HI_x_global;
-    skewers_F_HeII_global = skewers_transmitted_flux_HeII_x_global;
-    skewers_F_HI_root     = skewers_transmitted_flux_HI_x;
-    skewers_F_HeII_root   = skewers_transmitted_flux_HeII_x;
-    transfer_buffer = transfer_buffer_root_x;
-  }
-  if ( axis == 1 ){
-    am_I_root = am_I_root_y;
-    n_los = ny_total;
-    root_procs = root_procs_y;
-    n_skewers_root  = n_skewers_local_y;
-    skewers_density_root   = skewers_density_root_y;
-    skewers_density_global = skewers_density_y_global;
-    skewers_HI_density_root   = skewers_HI_density_root_y;
-    skewers_HI_density_global = skewers_HI_density_y_global;
+    skewers_F_HI_global         = skewers_transmitted_flux_HI_x_global;
+    skewers_F_HeII_global       = skewers_transmitted_flux_HeII_x_global;
+    skewers_F_HI_root           = skewers_transmitted_flux_HI_x;
+    skewers_F_HeII_root         = skewers_transmitted_flux_HeII_x;
+    transfer_buffer             = transfer_buffer_root_x;
+  }
+  if (axis == 1) {
+    am_I_root                   = am_I_root_y;
+    n_los                       = ny_total;
+    root_procs                  = root_procs_y;
+    n_skewers_root              = n_skewers_local_y;
+    skewers_density_root        = skewers_density_root_y;
+    skewers_density_global      = skewers_density_y_global;
+    skewers_HI_density_root     = skewers_HI_density_root_y;
+    skewers_HI_density_global   = skewers_HI_density_y_global;
     skewers_HeII_density_root   = skewers_HeII_density_root_y;
     skewers_HeII_density_global = skewers_HeII_density_y_global;
-    skewers_temperature_root   = skewers_temperature_root_y;
-    skewers_temperature_global = skewers_temperature_y_global;
+    skewers_temperature_root    = skewers_temperature_root_y;
+    skewers_temperature_global  = skewers_temperature_y_global;
     skewers_los_velocity_root   = skewers_velocity_root_y;
     skewers_los_velocity_global = skewers_los_velocity_y_global;
-    skewers_F_HI_global   = skewers_transmitted_flux_HI_y_global;
-    skewers_F_HeII_global = skewers_transmitted_flux_HeII_y_global;
-    skewers_F_HI_root     = skewers_transmitted_flux_HI_y;
-    skewers_F_HeII_root   = skewers_transmitted_flux_HeII_y;
-    transfer_buffer = transfer_buffer_root_y;
-  }
-  if ( axis == 2 ){
-    am_I_root = am_I_root_z;
-    n_los = nz_total;
-    root_procs = root_procs_z;
-    n_skewers_root  = n_skewers_local_z;
-    skewers_density_root   = skewers_density_root_z;
-    skewers_density_global = skewers_density_z_global;
-    skewers_HI_density_root   = skewers_HI_density_root_z;
-    skewers_HI_density_global = skewers_HI_density_z_global;
+    skewers_F_HI_global         = skewers_transmitted_flux_HI_y_global;
+    skewers_F_HeII_global       = skewers_transmitted_flux_HeII_y_global;
+    skewers_F_HI_root           = skewers_transmitted_flux_HI_y;
+    skewers_F_HeII_root         = skewers_transmitted_flux_HeII_y;
+    transfer_buffer             = transfer_buffer_root_y;
+  }
+  if (axis == 2) {
+    am_I_root                   = am_I_root_z;
+    n_los                       = nz_total;
+    root_procs                  = root_procs_z;
+    n_skewers_root              = n_skewers_local_z;
+    skewers_density_root        = skewers_density_root_z;
+    skewers_density_global      = skewers_density_z_global;
+    skewers_HI_density_root     = skewers_HI_density_root_z;
+    skewers_HI_density_global   = skewers_HI_density_z_global;
     skewers_HeII_density_root   = skewers_HeII_density_root_z;
     skewers_HeII_density_global = skewers_HeII_density_z_global;
-    skewers_temperature_root   = skewers_temperature_root_z;
-    skewers_temperature_global = skewers_temperature_z_global;
+    skewers_temperature_root    = skewers_temperature_root_z;
+    skewers_temperature_global  = skewers_temperature_z_global;
     skewers_los_velocity_root   = skewers_velocity_root_z;
     skewers_los_velocity_global = skewers_los_velocity_z_global;
-    skewers_F_HI_global   = skewers_transmitted_flux_HI_z_global;
-    skewers_F_HeII_global = skewers_transmitted_flux_HeII_z_global;
-    skewers_F_HI_root     = skewers_transmitted_flux_HI_z;
-    skewers_F_HeII_root   = skewers_transmitted_flux_HeII_z;
-    transfer_buffer = transfer_buffer_root_z;
+    skewers_F_HI_global         = skewers_transmitted_flux_HI_z_global;
+    skewers_F_HeII_global       = skewers_transmitted_flux_HeII_z_global;
+    skewers_F_HI_root           = skewers_transmitted_flux_HI_z;
+    skewers_F_HeII_root         = skewers_transmitted_flux_HeII_z;
+    transfer_buffer             = transfer_buffer_root_z;
   }
-  
-  if ( !am_I_root ) return;
+
+  if (!am_I_root) return;
 
   MPI_Status mpi_status;
   int n_added, offset;
 
-  #ifdef OUTPUT_SKEWERS
+    #ifdef OUTPUT_SKEWERS
   // Set the density array
-  if ( procID == 0){
+  if (procID == 0) {
     // Write the local data into the global array
-    for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-      for ( int los_id=0; los_id<n_los; los_id++){
-        skewers_density_global[skewer_id*n_los + los_id] = skewers_density_root[skewer_id*n_los + los_id];
+    for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+      for (int los_id = 0; los_id < n_los; los_id++) {
+        skewers_density_global[skewer_id * n_los + los_id] = skewers_density_root[skewer_id * n_los + los_id];
       }
     }
     // Write the remote data into the global array
     n_added = 1;
-    for ( int p_id=1; p_id<nproc; p_id++ ){
-      if ( !root_procs[p_id] ) continue;
-      MPI_Recv( transfer_buffer, n_skewers_root*n_los, MPI_CHREAL, p_id, 0, world, &mpi_status  );  
+    for (int p_id = 1; p_id < nproc; p_id++) {
+      if (!root_procs[p_id]) continue;
+      MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status);
       offset = n_added * n_skewers_root * n_los;
-      for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-        for ( int los_id=0; los_id<n_los; los_id++){
-          skewers_density_global[offset + skewer_id*n_los + los_id] = transfer_buffer[skewer_id*n_los + los_id];
+      for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+        for (int los_id = 0; los_id < n_los; los_id++) {
+          skewers_density_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id];
         }
       }
       n_added += 1;
-    }  
-  }
-  else{
-    MPI_Send( skewers_density_root, n_skewers_root*n_los, MPI_CHREAL, 0, 0, world  );
+    }
+  } else {
+    MPI_Send(skewers_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world);
   }
-  #endif
+    #endif
 
   // Set the HI density array
-  if ( procID == 0){
+  if (procID == 0) {
     // Write the local data into the global array
-    for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-      for ( int los_id=0; los_id<n_los; los_id++){
-        skewers_HI_density_global[skewer_id*n_los + los_id] = skewers_HI_density_root[skewer_id*n_los + los_id];
+    for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+      for (int los_id = 0; los_id < n_los; los_id++) {
+        skewers_HI_density_global[skewer_id * n_los + los_id] = skewers_HI_density_root[skewer_id * n_los + los_id];
       }
     }
     // Write the remote data into the global array
     n_added = 1;
-    for ( int p_id=1; p_id<nproc; p_id++ ){
-      if ( !root_procs[p_id] ) continue;
-      MPI_Recv( transfer_buffer, n_skewers_root*n_los, MPI_CHREAL, p_id, 0, world, &mpi_status  );  
+    for (int p_id = 1; p_id < nproc; p_id++) {
+      if (!root_procs[p_id]) continue;
+      MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status);
       offset = n_added * n_skewers_root * n_los;
-      for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-        for ( int los_id=0; los_id<n_los; los_id++){
-          skewers_HI_density_global[offset + skewer_id*n_los + los_id] = transfer_buffer[skewer_id*n_los + los_id];
+      for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+        for (int los_id = 0; los_id < n_los; los_id++) {
+          skewers_HI_density_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id];
         }
       }
       n_added += 1;
-    }  
-  }
-  else{
-    MPI_Send( skewers_HI_density_root, n_skewers_root*n_los, MPI_CHREAL, 0, 0, world  );
+    }
+  } else {
+    MPI_Send(skewers_HI_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world);
   }
 
   // Set the HeII density array
-  if ( procID == 0){
+  if (procID == 0) {
     // Write the local data into the global array
-    for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-      for ( int los_id=0; los_id<n_los; los_id++){
-        skewers_HeII_density_global[skewer_id*n_los + los_id] = skewers_HeII_density_root[skewer_id*n_los + los_id];
+    for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+      for (int los_id = 0; los_id < n_los; los_id++) {
+        skewers_HeII_density_global[skewer_id * n_los + los_id] = skewers_HeII_density_root[skewer_id * n_los + los_id];
       }
     }
     // Write the remote data into the global array
     n_added = 1;
-    for ( int p_id=1; p_id<nproc; p_id++ ){
-      if ( !root_procs[p_id] ) continue;
-      MPI_Recv( transfer_buffer, n_skewers_root*n_los, MPI_CHREAL, p_id, 0, world, &mpi_status  );  
+    for (int p_id = 1; p_id < nproc; p_id++) {
+      if (!root_procs[p_id]) continue;
+      MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status);
       offset = n_added * n_skewers_root * n_los;
-      for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-        for ( int los_id=0; los_id<n_los; los_id++){
-          skewers_HeII_density_global[offset + skewer_id*n_los + los_id] = transfer_buffer[skewer_id*n_los + los_id];
+      for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+        for (int los_id = 0; los_id < n_los; los_id++) {
+          skewers_HeII_density_global[offset + skewer_id * n_los + los_id] =
+              transfer_buffer[skewer_id * n_los + los_id];
         }
       }
       n_added += 1;
-    }  
-  }
-  else{
-    MPI_Send( skewers_HeII_density_root, n_skewers_root*n_los, MPI_CHREAL, 0, 0, world  );
+    }
+  } else {
+    MPI_Send(skewers_HeII_density_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world);
   }
-  
+
   // Set the temeprature array
-  if ( procID == 0){
+  if (procID == 0) {
     // Write the local data into the global array
-    for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-      for ( int los_id=0; los_id<n_los; los_id++){
-        skewers_temperature_global[skewer_id*n_los + los_id] = skewers_temperature_root[skewer_id*n_los + los_id];
+    for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+      for (int los_id = 0; los_id < n_los; los_id++) {
+        skewers_temperature_global[skewer_id * n_los + los_id] = skewers_temperature_root[skewer_id * n_los + los_id];
       }
     }
     // Write the remote data into the global array
     n_added = 1;
-    for ( int p_id=1; p_id<nproc; p_id++ ){
-      if ( !root_procs[p_id] ) continue;
-      MPI_Recv( transfer_buffer, n_skewers_root*n_los, MPI_CHREAL, p_id, 0, world, &mpi_status  );  
+    for (int p_id = 1; p_id < nproc; p_id++) {
+      if (!root_procs[p_id]) continue;
+      MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status);
       offset = n_added * n_skewers_root * n_los;
-      for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-        for ( int los_id=0; los_id<n_los; los_id++){
-          skewers_temperature_global[offset + skewer_id*n_los + los_id] = transfer_buffer[skewer_id*n_los + los_id];
+      for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+        for (int los_id = 0; los_id < n_los; los_id++) {
+          skewers_temperature_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id];
         }
       }
       n_added += 1;
-    }  
-  }
-  else{
-    MPI_Send( skewers_temperature_root, n_skewers_root*n_los, MPI_CHREAL, 0, 0, world  );
+    }
+  } else {
+    MPI_Send(skewers_temperature_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world);
   }
-  
+
   // Set the los_velocity array
-  if ( procID == 0){
+  if (procID == 0) {
     // Write the local data into the global array
-    for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-      for ( int los_id=0; los_id<n_los; los_id++){
-        skewers_los_velocity_global[skewer_id*n_los + los_id] = skewers_los_velocity_root[skewer_id*n_los + los_id];
+    for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+      for (int los_id = 0; los_id < n_los; los_id++) {
+        skewers_los_velocity_global[skewer_id * n_los + los_id] = skewers_los_velocity_root[skewer_id * n_los + los_id];
       }
     }
     // Write the remote data into the global array
     n_added = 1;
-    for ( int p_id=1; p_id<nproc; p_id++ ){
-      if ( !root_procs[p_id] ) continue;
-      MPI_Recv( transfer_buffer, n_skewers_root*n_los, MPI_CHREAL, p_id, 0, world, &mpi_status  );  
+    for (int p_id = 1; p_id < nproc; p_id++) {
+      if (!root_procs[p_id]) continue;
+      MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status);
       offset = n_added * n_skewers_root * n_los;
-      for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-        for ( int los_id=0; los_id<n_los; los_id++){
-          skewers_los_velocity_global[offset + skewer_id*n_los + los_id] = transfer_buffer[skewer_id*n_los + los_id];
+      for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+        for (int los_id = 0; los_id < n_los; los_id++) {
+          skewers_los_velocity_global[offset + skewer_id * n_los + los_id] =
+              transfer_buffer[skewer_id * n_los + los_id];
         }
       }
       n_added += 1;
-    }  
+    }
+  } else {
+    MPI_Send(skewers_los_velocity_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world);
   }
-  else{
-    MPI_Send( skewers_los_velocity_root, n_skewers_root*n_los, MPI_CHREAL, 0, 0, world  );
-  }  
-  
+
   // Set the HI Flux array
-  if ( procID == 0){
+  if (procID == 0) {
     // Write the local data into the global array
-    for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-      for ( int los_id=0; los_id<n_los; los_id++){
-        skewers_F_HI_global[skewer_id*n_los + los_id] = skewers_F_HI_root[skewer_id*n_los + los_id];
+    for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+      for (int los_id = 0; los_id < n_los; los_id++) {
+        skewers_F_HI_global[skewer_id * n_los + los_id] = skewers_F_HI_root[skewer_id * n_los + los_id];
       }
     }
     // Write the remote data into the global array
     n_added = 1;
-    for ( int p_id=1; p_id<nproc; p_id++ ){
-      if ( !root_procs[p_id] ) continue;
-      MPI_Recv( transfer_buffer, n_skewers_root*n_los, MPI_CHREAL, p_id, 0, world, &mpi_status  );  
+    for (int p_id = 1; p_id < nproc; p_id++) {
+      if (!root_procs[p_id]) continue;
+      MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status);
       offset = n_added * n_skewers_root * n_los;
-      for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-        for ( int los_id=0; los_id<n_los; los_id++){
-          skewers_F_HI_global[offset + skewer_id*n_los + los_id] = transfer_buffer[skewer_id*n_los + los_id];
+      for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+        for (int los_id = 0; los_id < n_los; los_id++) {
+          skewers_F_HI_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id];
         }
       }
       n_added += 1;
-    }  
-  }
-  else{
-    MPI_Send( skewers_F_HI_root, n_skewers_root*n_los, MPI_CHREAL, 0, 0, world  );
+    }
+  } else {
+    MPI_Send(skewers_F_HI_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world);
   }
-  
-  
+
   // Set the HeII Flux array
-  if ( procID == 0){
+  if (procID == 0) {
     // Write the local data into the global array
-    for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-      for ( int los_id=0; los_id<n_los; los_id++){
-        skewers_F_HeII_global[skewer_id*n_los + los_id] = skewers_F_HeII_root[skewer_id*n_los + los_id];
+    for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+      for (int los_id = 0; los_id < n_los; los_id++) {
+        skewers_F_HeII_global[skewer_id * n_los + los_id] = skewers_F_HeII_root[skewer_id * n_los + los_id];
       }
     }
     // Write the remote data into the global array
     n_added = 1;
-    for ( int p_id=1; p_id<nproc; p_id++ ){
-      if ( !root_procs[p_id] ) continue;
-      MPI_Recv( transfer_buffer, n_skewers_root*n_los, MPI_CHREAL, p_id, 0, world, &mpi_status  );  
+    for (int p_id = 1; p_id < nproc; p_id++) {
+      if (!root_procs[p_id]) continue;
+      MPI_Recv(transfer_buffer, n_skewers_root * n_los, MPI_CHREAL, p_id, 0, world, &mpi_status);
       offset = n_added * n_skewers_root * n_los;
-      for ( int skewer_id=0; skewer_id<n_skewers_root; skewer_id++){
-        for ( int los_id=0; los_id<n_los; los_id++){
-          skewers_F_HeII_global[offset + skewer_id*n_los + los_id] = transfer_buffer[skewer_id*n_los + los_id];
+      for (int skewer_id = 0; skewer_id < n_skewers_root; skewer_id++) {
+        for (int los_id = 0; los_id < n_los; los_id++) {
+          skewers_F_HeII_global[offset + skewer_id * n_los + los_id] = transfer_buffer[skewer_id * n_los + los_id];
         }
       }
       n_added += 1;
-    }  
-  }
-  else{
-    MPI_Send( skewers_F_HeII_root, n_skewers_root*n_los, MPI_CHREAL, 0, 0, world  );
+    }
+  } else {
+    MPI_Send(skewers_F_HeII_root, n_skewers_root * n_los, MPI_CHREAL, 0, 0, world);
   }
-  
-  
-  
-  
 }
 
-int Locate_Index( Real val, Real *values, int N ){
+int Locate_Index(Real val, Real *values, int N)
+{
   // Find the index such that  values[index] < val < values[index+1]
   // Values has to be sorted
-  if ( val < values[0] )   return -2;
-  if ( val > values[N-1] ) return -1;
+  if (val < values[0]) return -2;
+  if (val > values[N - 1]) return -1;
 
   int index = 0;
-  while ( index < N ){
-    if ( val < values[index] ) break;
+  while (index < N) {
+    if (val < values[index]) break;
     index += 1;
   }
 
-  if ( val < values[index-1] ){
-    chprintf( "ERROR; Value less than left edge:  val=%f    left=%f \n", val, values[index-1] );
+  if (val < values[index - 1]) {
+    chprintf("ERROR; Value less than left edge:  val=%f    left=%f \n", val, values[index - 1]);
     exit(-1);
   }
-  if ( val > values[index] ){
-    chprintf( "ERROR; Value grater than right edge:  val=%f    right=%f \n", val, values[index] );
+  if (val > values[index]) {
+    chprintf("ERROR; Value grater than right edge:  val=%f    right=%f \n", val, values[index]);
     exit(-1);
   }
 
-  // chprintf( " %d:    %e   %e   %e \n ", index, values[index-1], val, values[index]);
-  return index-1;
-
+  // chprintf( " %d:    %e   %e   %e \n ", index, values[index-1], val,
+  // values[index]);
+  return index - 1;
 }
 
-void Analysis_Module::Clear_Power_Spectrum_Measurements( void ){
-
-  MPI_Barrier( world );
+void AnalysisModule::Clear_Power_Spectrum_Measurements(void)
+{
+  MPI_Barrier(world);
 
   // chprintf( "Cleared Power Spectrum cache \n ");
-  free( hist_k_edges_x );
-  free( hist_PS_x );
-  free( hist_n_x );
-  free( ps_root_x );
-  free( ps_global_x );
-
-  free( hist_k_edges_y );
-  free( hist_PS_y );
-  free( hist_n_y );
-  free( ps_root_y );
-  free( ps_global_y );
-
-  free( hist_k_edges_z );
-  free( hist_PS_z );
-  free( hist_n_z );
-  free( ps_root_z );
-  free( ps_global_z );
-  
-  free( k_centers );
-  free( ps_mean );
-
+  free(hist_k_edges_x);
+  free(hist_PS_x);
+  free(hist_n_x);
+  free(ps_root_x);
+  free(ps_global_x);
+
+  free(hist_k_edges_y);
+  free(hist_PS_y);
+  free(hist_n_y);
+  free(ps_root_y);
+  free(ps_global_y);
+
+  free(hist_k_edges_z);
+  free(hist_PS_z);
+  free(hist_n_z);
+  free(ps_root_z);
+  free(ps_global_z);
+
+  free(k_centers);
+  free(ps_mean);
 }
 
-void Grid3D::Initialize_Power_Spectrum_Measurements( int axis ){
-
+void Grid3D::Initialize_Power_Spectrum_Measurements(int axis)
+{
   int n_los, n_fft;
   Real Lbox, delta_x;
   Real *k_vals;
 
-  if ( axis == 0 ){
+  if (axis == 0) {
     Analysis.n_PS_processed_x = 0;
-    n_los               = Analysis.nx_total;
-    n_fft               = Analysis.n_fft_x;
-    Lbox                = Analysis.Lbox_x;
-    delta_x             = Analysis.dx;
-    k_vals              = Analysis.k_vals_x;
+    n_los                     = Analysis.nx_total;
+    n_fft                     = Analysis.n_fft_x;
+    Lbox                      = Analysis.Lbox_x;
+    delta_x                   = Analysis.dx;
+    k_vals                    = Analysis.k_vals_x;
   }
 
-  if ( axis == 1 ){
+  if (axis == 1) {
     Analysis.n_PS_processed_y = 0;
-    n_los               = Analysis.ny_total;
-    n_fft               = Analysis.n_fft_y;
-    Lbox                = Analysis.Lbox_y;
-    delta_x             = Analysis.dy;
-    k_vals              = Analysis.k_vals_y;
+    n_los                     = Analysis.ny_total;
+    n_fft                     = Analysis.n_fft_y;
+    Lbox                      = Analysis.Lbox_y;
+    delta_x                   = Analysis.dy;
+    k_vals                    = Analysis.k_vals_y;
   }
 
-  if ( axis == 2 ){
+  if (axis == 2) {
     Analysis.n_PS_processed_z = 0;
-    n_los               = Analysis.nz_total;
-    n_fft               = Analysis.n_fft_z;
-    Lbox                = Analysis.Lbox_z;
-    delta_x             = Analysis.dz;
-    k_vals              = Analysis.k_vals_z;
+    n_los                     = Analysis.nz_total;
+    n_fft                     = Analysis.n_fft_z;
+    Lbox                      = Analysis.Lbox_z;
+    delta_x                   = Analysis.dz;
+    k_vals                    = Analysis.k_vals_z;
   }
 
-
   // Get Cosmological variables
   Real H, current_a, L_proper, dx_proper, dv_Hubble;
   current_a = Cosmo.current_a;
-  L_proper = Lbox * current_a / Cosmo.cosmo_h;
+  L_proper  = Lbox * current_a / Cosmo.cosmo_h;
   dx_proper = delta_x * current_a / Cosmo.cosmo_h;
-  H = Cosmo.Get_Hubble_Parameter( current_a );
-  dv_Hubble = H * dx_proper; // km/s
-
+  H         = Cosmo.Get_Hubble_Parameter(current_a);
+  dv_Hubble = H * dx_proper;  // km/s
 
   // Compute the K values
-  for ( int i=0; i<n_fft; i++ ){
-    k_vals[i] = 2 * M_PI * i / ( n_los * dv_Hubble );
+  for (int i = 0; i < n_fft; i++) {
+    k_vals[i] = 2 * M_PI * i / (n_los * dv_Hubble);
     // if ( axis == 0 ) chprintf( "k: %f \n", k_vals[i]  );
   }
 
-  Real k_val, k_min,  k_max, d_log_k, k_start;
+  Real k_val, k_min, k_max, d_log_k, k_start;
   d_log_k = Analysis.d_log_k;
-  k_min = log10( k_vals[1] );
-  k_max = log10( k_vals[n_fft-1] );
-  k_start = log10( 0.99 * k_vals[1] );
+  k_min   = log10(k_vals[1]);
+  k_max   = log10(k_vals[n_fft - 1]);
+  k_start = log10(0.99 * k_vals[1]);
 
-
-  if ( d_log_k == 0 ){
-    chprintf( "ERROR: d_log_k = 0    Set  lya_Pk_d_log_k in the parameter file \n"  );
+  if (d_log_k == 0) {
+    chprintf("ERROR: d_log_k = 0    Set  lya_Pk_d_log_k in the parameter file \n");
     exit(-1);
   }
 
   // if ( axis == 0 ) chprintf( "dv_Hubble: %f \n", dv_Hubble  );
   // if ( axis == 0 ) chprintf( "k min : %f \n", k_min  );
   // if ( axis == 0 ) chprintf( "k max : %f \n", k_max  );
-  
-  k_val = k_start;
+
+  k_val            = k_start;
   int n_hist_edges = 1;
-  while ( k_val < k_max ){
+  while (k_val < k_max) {
     n_hist_edges += 1;
-    k_val += d_log_k ;
+    k_val += d_log_k;
   }
-    
+
   Real *hist_k_edges;
   Real *hist_PS;
   Real *hist_n;
@@ -429,79 +416,74 @@ void Grid3D::Initialize_Power_Spectrum_Measurements( int axis ){
 
   int n_bins = n_hist_edges - 1;
   // chprintf( " n bins : %d \n", n_bins  );
-  hist_k_edges = (Real *) malloc(n_hist_edges*sizeof(Real));
-  hist_PS     = (Real *) malloc(n_bins*sizeof(Real));
-  hist_n      = (Real *) malloc(n_bins*sizeof(Real));
-  ps_root     = (Real *) malloc(n_bins*sizeof(Real));
-  ps_global   = (Real *) malloc(n_bins*sizeof(Real));
+  hist_k_edges = (Real *)malloc(n_hist_edges * sizeof(Real));
+  hist_PS      = (Real *)malloc(n_bins * sizeof(Real));
+  hist_n       = (Real *)malloc(n_bins * sizeof(Real));
+  ps_root      = (Real *)malloc(n_bins * sizeof(Real));
+  ps_global    = (Real *)malloc(n_bins * sizeof(Real));
 
   k_val = k_start;
-  for ( int bin_id=0; bin_id<n_hist_edges; bin_id++ ){
-    hist_k_edges[bin_id] = pow( 10, k_val );
+  for (int bin_id = 0; bin_id < n_hist_edges; bin_id++) {
+    hist_k_edges[bin_id] = pow(10, k_val);
     k_val += d_log_k;
   }
 
-  for ( int bin_id=0; bin_id<n_bins; bin_id++ ){
-    ps_root[bin_id] = 0;
+  for (int bin_id = 0; bin_id < n_bins; bin_id++) {
+    ps_root[bin_id]   = 0;
     ps_global[bin_id] = 0;
   }
 
-  if ( axis == 0 ){
+  if (axis == 0) {
     Analysis.n_hist_edges_x = n_hist_edges;
     Analysis.hist_k_edges_x = hist_k_edges;
-    Analysis.hist_PS_x     = hist_PS;
-    Analysis.hist_n_x      = hist_n;
-    Analysis.ps_root_x     = ps_root;
-    Analysis.ps_global_x   = ps_global;
+    Analysis.hist_PS_x      = hist_PS;
+    Analysis.hist_n_x       = hist_n;
+    Analysis.ps_root_x      = ps_root;
+    Analysis.ps_global_x    = ps_global;
   }
 
-  if ( axis == 1 ){
+  if (axis == 1) {
     Analysis.n_hist_edges_y = n_hist_edges;
     Analysis.hist_k_edges_y = hist_k_edges;
-    Analysis.hist_PS_y     = hist_PS;
-    Analysis.hist_n_y      = hist_n;
-    Analysis.ps_root_y     = ps_root;
-    Analysis.ps_global_y   = ps_global;
+    Analysis.hist_PS_y      = hist_PS;
+    Analysis.hist_n_y       = hist_n;
+    Analysis.ps_root_y      = ps_root;
+    Analysis.ps_global_y    = ps_global;
   }
 
-  if ( axis == 2 ){
+  if (axis == 2) {
     Analysis.n_hist_edges_z = n_hist_edges;
     Analysis.hist_k_edges_z = hist_k_edges;
-    Analysis.hist_PS_z     = hist_PS;
-    Analysis.hist_n_z      = hist_n;
-    Analysis.ps_root_z     = ps_root;
-    Analysis.ps_global_z   = ps_global;
+    Analysis.hist_PS_z      = hist_PS;
+    Analysis.hist_n_z       = hist_n;
+    Analysis.ps_root_z      = ps_root;
+    Analysis.ps_global_z    = ps_global;
   }
 
-
   // Create array  for global PS
-  if ( axis == 2 ){
-    if ( Analysis.n_hist_edges_x != Analysis.n_hist_edges_y || Analysis.n_hist_edges_x != Analysis.n_hist_edges_z ){
-      chprintf( "ERROR: PS Histogram sizes dont match \n");
+  if (axis == 2) {
+    if (Analysis.n_hist_edges_x != Analysis.n_hist_edges_y || Analysis.n_hist_edges_x != Analysis.n_hist_edges_z) {
+      chprintf("ERROR: PS Histogram sizes dont match \n");
       exit(-1);
-    }
-    else{
-      Analysis.ps_mean  = (Real *) malloc(n_bins*sizeof(Real));
-      Analysis.k_centers = (Real *) malloc(n_bins*sizeof(Real));
-      
-      for (int bin_id=0; bin_id<n_bins; bin_id++ ){
-        Analysis.k_centers[bin_id] = sqrt( Analysis.hist_k_edges_x[bin_id] * Analysis.hist_k_edges_x[bin_id+1]  );
+    } else {
+      Analysis.ps_mean   = (Real *)malloc(n_bins * sizeof(Real));
+      Analysis.k_centers = (Real *)malloc(n_bins * sizeof(Real));
+
+      for (int bin_id = 0; bin_id < n_bins; bin_id++) {
+        Analysis.k_centers[bin_id] = sqrt(Analysis.hist_k_edges_x[bin_id] * Analysis.hist_k_edges_x[bin_id + 1]);
       }
     }
-
   }
 
   // if ( axis == 0 ){
   //   for ( int bin_id=0; bin_id<n_hist_edges; bin_id++ ){
   //     chprintf( "%f \n", hist_k_edges[bin_id]);
   //   }
-  // }  
+  // }
 }
 
-
-
-void Grid3D::Compute_Flux_Power_Spectrum_Skewer( int skewer_id, int axis ){
-
+void Grid3D::Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis)
+{
   bool am_I_root;
   int n_los, n_fft, n_hist_edges;
   Real Lbox, delta_x;
@@ -515,149 +497,144 @@ void Grid3D::Compute_Flux_Power_Spectrum_Skewer( int skewer_id, int axis ){
   Real *hist_n;
   Real *ps_root;
   Real *skewers_transmitted_flux;
-  
-  if ( axis == 0 ){
-    am_I_root           = Analysis.am_I_root_x;
-    n_los               = Analysis.nx_total;
-    n_fft               = Analysis.n_fft_x;
-    n_hist_edges        = Analysis.n_hist_edges_x;
-    Lbox                = Analysis.Lbox_x;
-    delta_x             = Analysis.dx;
-    delta_F             = Analysis.delta_F_x;
-    fft_delta_F         = Analysis.fft_delta_F_x;
-    fft2_delta_F        = Analysis.fft2_delta_F_x;
-    k_vals              = Analysis.k_vals_x;
-    fftw_plan           = Analysis.fftw_plan_x;
-    hist_k_edges        = Analysis.hist_k_edges_x;
-    hist_PS             = Analysis.hist_PS_x;
-    hist_n              = Analysis.hist_n_x;
-    ps_root             = Analysis.ps_root_x;
-    skewers_transmitted_flux    = Analysis.skewers_transmitted_flux_HI_x;
-  }
-
-  if ( axis == 1 ){
-    am_I_root           = Analysis.am_I_root_y;
-    n_los               = Analysis.ny_total;
-    n_fft               = Analysis.n_fft_y;
-    n_hist_edges        = Analysis.n_hist_edges_y;
-    Lbox                = Analysis.Lbox_y;
-    delta_x             = Analysis.dy;
-    delta_F             = Analysis.delta_F_y;
-    fft_delta_F         = Analysis.fft_delta_F_y;
-    fft2_delta_F        = Analysis.fft2_delta_F_y;
-    k_vals              = Analysis.k_vals_y;
-    fftw_plan           = Analysis.fftw_plan_y;
-    hist_k_edges        = Analysis.hist_k_edges_y;
-    hist_PS             = Analysis.hist_PS_y;
-    hist_n              = Analysis.hist_n_y;
-    ps_root             = Analysis.ps_root_y;
-    skewers_transmitted_flux    = Analysis.skewers_transmitted_flux_HI_y;
-  }
-
-  if ( axis == 2 ){
-    am_I_root           = Analysis.am_I_root_z;
-    n_los               = Analysis.nz_total;
-    n_fft               = Analysis.n_fft_z;
-    n_hist_edges        = Analysis.n_hist_edges_z;
-    Lbox                = Analysis.Lbox_z;
-    delta_x             = Analysis.dz;
-    delta_F             = Analysis.delta_F_z;
-    fft_delta_F         = Analysis.fft_delta_F_z;
-    fft2_delta_F        = Analysis.fft2_delta_F_z;
-    k_vals              = Analysis.k_vals_z;
-    fftw_plan           = Analysis.fftw_plan_z;
-    hist_k_edges        = Analysis.hist_k_edges_z;
-    hist_PS             = Analysis.hist_PS_z;
-    hist_n              = Analysis.hist_n_z;
-    ps_root             = Analysis.ps_root_z;
-    skewers_transmitted_flux    = Analysis.skewers_transmitted_flux_HI_z;
+
+  if (axis == 0) {
+    am_I_root                = Analysis.am_I_root_x;
+    n_los                    = Analysis.nx_total;
+    n_fft                    = Analysis.n_fft_x;
+    n_hist_edges             = Analysis.n_hist_edges_x;
+    Lbox                     = Analysis.Lbox_x;
+    delta_x                  = Analysis.dx;
+    delta_F                  = Analysis.delta_F_x;
+    fft_delta_F              = Analysis.fft_delta_F_x;
+    fft2_delta_F             = Analysis.fft2_delta_F_x;
+    k_vals                   = Analysis.k_vals_x;
+    fftw_plan                = Analysis.fftw_plan_x;
+    hist_k_edges             = Analysis.hist_k_edges_x;
+    hist_PS                  = Analysis.hist_PS_x;
+    hist_n                   = Analysis.hist_n_x;
+    ps_root                  = Analysis.ps_root_x;
+    skewers_transmitted_flux = Analysis.skewers_transmitted_flux_HI_x;
+  }
+
+  if (axis == 1) {
+    am_I_root                = Analysis.am_I_root_y;
+    n_los                    = Analysis.ny_total;
+    n_fft                    = Analysis.n_fft_y;
+    n_hist_edges             = Analysis.n_hist_edges_y;
+    Lbox                     = Analysis.Lbox_y;
+    delta_x                  = Analysis.dy;
+    delta_F                  = Analysis.delta_F_y;
+    fft_delta_F              = Analysis.fft_delta_F_y;
+    fft2_delta_F             = Analysis.fft2_delta_F_y;
+    k_vals                   = Analysis.k_vals_y;
+    fftw_plan                = Analysis.fftw_plan_y;
+    hist_k_edges             = Analysis.hist_k_edges_y;
+    hist_PS                  = Analysis.hist_PS_y;
+    hist_n                   = Analysis.hist_n_y;
+    ps_root                  = Analysis.ps_root_y;
+    skewers_transmitted_flux = Analysis.skewers_transmitted_flux_HI_y;
+  }
+
+  if (axis == 2) {
+    am_I_root                = Analysis.am_I_root_z;
+    n_los                    = Analysis.nz_total;
+    n_fft                    = Analysis.n_fft_z;
+    n_hist_edges             = Analysis.n_hist_edges_z;
+    Lbox                     = Analysis.Lbox_z;
+    delta_x                  = Analysis.dz;
+    delta_F                  = Analysis.delta_F_z;
+    fft_delta_F              = Analysis.fft_delta_F_z;
+    fft2_delta_F             = Analysis.fft2_delta_F_z;
+    k_vals                   = Analysis.k_vals_z;
+    fftw_plan                = Analysis.fftw_plan_z;
+    hist_k_edges             = Analysis.hist_k_edges_z;
+    hist_PS                  = Analysis.hist_PS_z;
+    hist_n                   = Analysis.hist_n_z;
+    ps_root                  = Analysis.ps_root_z;
+    skewers_transmitted_flux = Analysis.skewers_transmitted_flux_HI_z;
   }
 
   int n_bins = n_hist_edges - 1;
 
-  if ( !am_I_root ){
-    for ( int i=0; i<n_bins; i++ ){
+  if (!am_I_root) {
+    for (int i = 0; i < n_bins; i++) {
       ps_root[i] = 0;
     }
     return;
-  }  
-  
-  
-  for ( int los_id=0; los_id<n_los; los_id++ ){
-    delta_F[los_id] = skewers_transmitted_flux[ skewer_id * n_los + los_id] / Analysis.Flux_mean_HI;
+  }
+
+  for (int los_id = 0; los_id < n_los; los_id++) {
+    delta_F[los_id] = skewers_transmitted_flux[skewer_id * n_los + los_id] / Analysis.Flux_mean_HI;
   }
 
   // Compute the r2c FFT
-  fftw_execute( fftw_plan );
-  
+  fftw_execute(fftw_plan);
+
   // Get Cosmological variables
   Real H, current_a, L_proper, dx_proper, dv_Hubble;
   current_a = Cosmo.current_a;
-  L_proper = Lbox * current_a / Cosmo.cosmo_h;
+  L_proper  = Lbox * current_a / Cosmo.cosmo_h;
   dx_proper = delta_x * current_a / Cosmo.cosmo_h;
-  H = Cosmo.Get_Hubble_Parameter( current_a );
-  dv_Hubble = H * dx_proper; // km/s
+  H         = Cosmo.Get_Hubble_Parameter(current_a);
+  dv_Hubble = H * dx_proper;  // km/s
 
   // Compute the amplitude of the FFT
-  for ( int i=0; i<n_fft; i++ ){
-    fft2_delta_F[i] = (fft_delta_F[i][0]*fft_delta_F[i][0] + fft_delta_F[i][1]*fft_delta_F[i][1]) / n_los / n_los;
+  for (int i = 0; i < n_fft; i++) {
+    fft2_delta_F[i] = (fft_delta_F[i][0] * fft_delta_F[i][0] + fft_delta_F[i][1] * fft_delta_F[i][1]) / n_los / n_los;
   }
 
   // Arrange in k-bins
-  for ( int i=0; i<n_bins; i++ ){
+  for (int i = 0; i < n_bins; i++) {
     hist_PS[i] = 0;
-    hist_n[i] = 0;
+    hist_n[i]  = 0;
   }
 
-
-
   int bin_id;
   Real k_val;
-  for ( int i=0; i<n_fft; i++ ){
+  for (int i = 0; i < n_fft; i++) {
     k_val = k_vals[i];
-    if ( k_val == 0 ) continue;
-    bin_id = Locate_Index( k_val, hist_k_edges, n_hist_edges );
-    if ( bin_id < 0 ) chprintf( " %d:   %e    %e   %e \n", bin_id, hist_k_edges[0], k_val, hist_k_edges[1] );
-    if ( bin_id < 0 || bin_id >= n_bins ) continue;
+    if (k_val == 0) continue;
+    bin_id = Locate_Index(k_val, hist_k_edges, n_hist_edges);
+    if (bin_id < 0) chprintf(" %d:   %e    %e   %e \n", bin_id, hist_k_edges[0], k_val, hist_k_edges[1]);
+    if (bin_id < 0 || bin_id >= n_bins) continue;
     hist_PS[bin_id] += fft2_delta_F[i];
-    hist_n[bin_id]  += 1;
+    hist_n[bin_id] += 1;
   }
 
   int hist_sum = 0;
-  for ( int i=0; i<n_bins; i++ ){
+  for (int i = 0; i < n_bins; i++) {
     hist_sum += hist_n[i];
   }
 
   // Add skewer PS to root PS
   Real PS_bin_val;
-  for ( int i=0; i<n_bins; i++ ){
-    if ( hist_n[i] == 0 ) PS_bin_val = 0;
-    else PS_bin_val = hist_PS[i] / hist_n[i] * ( H*L_proper);
+  for (int i = 0; i < n_bins; i++) {
+    if (hist_n[i] == 0)
+      PS_bin_val = 0;
+    else
+      PS_bin_val = hist_PS[i] / hist_n[i] * (H * L_proper);
     ps_root[i] += PS_bin_val;
   }
 
-  if ( hist_sum != n_fft-1 ){
-    printf("ERROR: Histogram sum doesn't match n_pfft:  sum=%d    n_fft=%d \n", hist_sum, n_fft-1);
+  if (hist_sum != n_fft - 1) {
+    printf("ERROR: Histogram sum doesn't match n_pfft:  sum=%d    n_fft=%d \n", hist_sum, n_fft - 1);
     exit(-1);
   }
 
-
-  if ( axis == 0 ) Analysis.n_PS_processed_x += 1;
-  if ( axis == 1 ) Analysis.n_PS_processed_y += 1;
-  if ( axis == 2 ) Analysis.n_PS_processed_z += 1;
-
-
+  if (axis == 0) Analysis.n_PS_processed_x += 1;
+  if (axis == 1) Analysis.n_PS_processed_y += 1;
+  if (axis == 2) Analysis.n_PS_processed_z += 1;
 }
 
-
-void Analysis_Module::Reduce_Power_Spectrum_Axis( int axis ){
-
+void AnalysisModule::Reduce_Power_Spectrum_Axis(int axis)
+{
   int n_root, n_bins;
   Real *ps_root;
   Real *ps_global;
   int *n_axis;
 
-  if ( axis == 0 ){
+  if (axis == 0) {
     n_bins    = n_hist_edges_x - 1;
     n_root    = n_PS_processed_x;
     ps_root   = ps_root_x;
@@ -665,7 +642,7 @@ void Analysis_Module::Reduce_Power_Spectrum_Axis( int axis ){
     n_axis    = &n_PS_axis_x;
   }
 
-  if ( axis == 1 ){
+  if (axis == 1) {
     n_bins    = n_hist_edges_y - 1;
     n_root    = n_PS_processed_y;
     ps_root   = ps_root_y;
@@ -673,7 +650,7 @@ void Analysis_Module::Reduce_Power_Spectrum_Axis( int axis ){
     n_axis    = &n_PS_axis_y;
   }
 
-  if ( axis == 2 ){
+  if (axis == 2) {
     n_bins    = n_hist_edges_z - 1;
     n_root    = n_PS_processed_z;
     ps_root   = ps_root_z;
@@ -681,52 +658,58 @@ void Analysis_Module::Reduce_Power_Spectrum_Axis( int axis ){
     n_axis    = &n_PS_axis_z;
   }
 
-  MPI_Allreduce( ps_root, ps_global, n_bins, MPI_CHREAL, MPI_SUM, world );
-  MPI_Allreduce( &n_root, n_axis, 1, MPI_INT, MPI_SUM, world );
-  // chprintf( "  N_Skewers_Processed: %d \n", *n_axis );  
+  MPI_Allreduce(ps_root, ps_global, n_bins, MPI_CHREAL, MPI_SUM, world);
+  MPI_Allreduce(&n_root, n_axis, 1, MPI_INT, MPI_SUM, world);
+  // chprintf( "  N_Skewers_Processed: %d \n", *n_axis );
 }
 
-
-void Analysis_Module::Reduce_Power_Spectrum_Global( ){
-
+void AnalysisModule::Reduce_Power_Spectrum_Global()
+{
   int n_PS_total = n_PS_axis_x + n_PS_axis_y + n_PS_axis_z;
-  if ( n_hist_edges_x != n_hist_edges_y || n_hist_edges_x != n_hist_edges_z ){
-    chprintf( "ERROR: PS Histogram sizes dont match \n");
+  if (n_hist_edges_x != n_hist_edges_y || n_hist_edges_x != n_hist_edges_z) {
+    chprintf("ERROR: PS Histogram sizes dont match \n");
     exit(-1);
   }
 
   int n_bins = n_hist_edges_x - 1;
 
-  for (int bin_id=0; bin_id<n_bins; bin_id++ ){
-    ps_mean[bin_id] = ( ps_global_x[bin_id] + ps_global_y[bin_id] + ps_global_z[bin_id] ) / n_PS_total;
+  for (int bin_id = 0; bin_id < n_bins; bin_id++) {
+    ps_mean[bin_id] = (ps_global_x[bin_id] + ps_global_y[bin_id] + ps_global_z[bin_id]) / n_PS_total;
   }
 
-  chprintf( " PS Bins: %d     N_Skewers_Processed: %d \n", n_bins, n_PS_total );
+  chprintf(" PS Bins: %d     N_Skewers_Processed: %d \n", n_bins, n_PS_total);
 
   // for (int bin_id=0; bin_id<n_bins; bin_id++ ){
-  //   chprintf( " %e   %e  \n", k_centers[bin_id], ps_mean[bin_id] *k_centers[bin_id] / M_PI);  
-  // }  
+  //   chprintf( " %e   %e  \n", k_centers[bin_id], ps_mean[bin_id]
+  //   *k_centers[bin_id] / M_PI);
+  // }
 }
 
-
-void Analysis_Module::Reduce_Lya_Mean_Flux_Global(){
-   
+void AnalysisModule::Reduce_Lya_Mean_Flux_Global()
+{
   n_skewers_processed = n_skewers_processed_x + n_skewers_processed_y + n_skewers_processed_z;
-  Flux_mean_HI   = ( Flux_mean_HI_x  *n_skewers_processed_x + Flux_mean_HI_y  *n_skewers_processed_y + Flux_mean_HI_z  *n_skewers_processed_z  ) / n_skewers_processed;;
-  Flux_mean_HeII = ( Flux_mean_HeII_x*n_skewers_processed_x + Flux_mean_HeII_y*n_skewers_processed_y + Flux_mean_HeII_z*n_skewers_processed_z  ) / n_skewers_processed;;
-  chprintf( " N_Skewers_Processed  Global: %d    F_Mean_HI: %e  F_Mean_HeII: %e\n", n_skewers_processed, Flux_mean_HI, Flux_mean_HeII  );
+  Flux_mean_HI        = (Flux_mean_HI_x * n_skewers_processed_x + Flux_mean_HI_y * n_skewers_processed_y +
+                  Flux_mean_HI_z * n_skewers_processed_z) /
+                 n_skewers_processed;
+  ;
+  Flux_mean_HeII = (Flux_mean_HeII_x * n_skewers_processed_x + Flux_mean_HeII_y * n_skewers_processed_y +
+                    Flux_mean_HeII_z * n_skewers_processed_z) /
+                   n_skewers_processed;
+  ;
+  chprintf(" N_Skewers_Processed  Global: %d    F_Mean_HI: %e  F_Mean_HeII: %e\n", n_skewers_processed, Flux_mean_HI,
+           Flux_mean_HeII);
 }
 
-void Analysis_Module::Reduce_Lya_Mean_Flux_Axis( int axis ){
-
-  int  *n_skewers_processed;
-  int  *n_skewers_processed_root;
+void AnalysisModule::Reduce_Lya_Mean_Flux_Axis(int axis)
+{
+  int *n_skewers_processed;
+  int *n_skewers_processed_root;
   Real *Flux_mean_HI;
   Real *Flux_mean_HeII;
   Real *Flux_mean_root_HI;
   Real *Flux_mean_root_HeII;
-  
-  if ( axis == 0 ){
+
+  if (axis == 0) {
     n_skewers_processed      = &n_skewers_processed_x;
     n_skewers_processed_root = &n_skewers_processed_root_x;
     Flux_mean_HI             = &Flux_mean_HI_x;
@@ -735,7 +718,7 @@ void Analysis_Module::Reduce_Lya_Mean_Flux_Axis( int axis ){
     Flux_mean_root_HeII      = &Flux_mean_root_HeII_x;
   }
 
-  if ( axis == 1 ){
+  if (axis == 1) {
     n_skewers_processed      = &n_skewers_processed_y;
     n_skewers_processed_root = &n_skewers_processed_root_y;
     Flux_mean_HI             = &Flux_mean_HI_y;
@@ -744,7 +727,7 @@ void Analysis_Module::Reduce_Lya_Mean_Flux_Axis( int axis ){
     Flux_mean_root_HeII      = &Flux_mean_root_HeII_y;
   }
 
-  if ( axis == 2 ){
+  if (axis == 2) {
     n_skewers_processed      = &n_skewers_processed_z;
     n_skewers_processed_root = &n_skewers_processed_root_z;
     Flux_mean_HI             = &Flux_mean_HI_z;
@@ -753,120 +736,114 @@ void Analysis_Module::Reduce_Lya_Mean_Flux_Axis( int axis ){
     Flux_mean_root_HeII      = &Flux_mean_root_HeII_z;
   }
 
+    #ifdef MPI_CHOLLA
 
-
-  #ifdef MPI_CHOLLA
-
-  #ifdef PRINT_ANALYSIS_LOG
-  for ( int i=0; i<nproc; i++ ){
-    if (procID == i) printf("   procID:%d   Flux_HI_Sum: %e     N_Skewers_Processed: %d \n", procID, (*Flux_mean_root_HI), *n_skewers_processed_root );
+      #ifdef PRINT_ANALYSIS_LOG
+  for (int i = 0; i < nproc; i++) {
+    if (procID == i)
+      printf("   procID:%d   Flux_HI_Sum: %e     N_Skewers_Processed: %d \n", procID, (*Flux_mean_root_HI),
+             *n_skewers_processed_root);
     MPI_Barrier(world);
     sleep(1);
   }
-  #endif
-  
-  MPI_Allreduce( Flux_mean_root_HI,   Flux_mean_HI,   1, MPI_CHREAL, MPI_SUM, world );
-  MPI_Allreduce( Flux_mean_root_HeII, Flux_mean_HeII, 1, MPI_CHREAL, MPI_SUM, world );
-  MPI_Allreduce( n_skewers_processed_root, n_skewers_processed, 1, MPI_INT, MPI_SUM, world );
-
-  #else
-  
-  *Flux_mean_HI   = *Flux_mean_root_HI;
-  *Flux_mean_HeII = *Flux_mean_root_HeII;
-  *n_skewers_processed = *n_skewers_processed_root;
+      #endif
 
-  #endif
-  
-  *Flux_mean_HI   = *Flux_mean_HI   / *n_skewers_processed;
-  *Flux_mean_HeII = *Flux_mean_HeII / *n_skewers_processed;
-  chprintf( "  N_Skewers_Processed: %d  Flux Mean HI: %e  Flux_mean_HeII: %e \n", *n_skewers_processed, *Flux_mean_HI, *Flux_mean_HeII );
+  MPI_Allreduce(Flux_mean_root_HI, Flux_mean_HI, 1, MPI_CHREAL, MPI_SUM, world);
+  MPI_Allreduce(Flux_mean_root_HeII, Flux_mean_HeII, 1, MPI_CHREAL, MPI_SUM, world);
+  MPI_Allreduce(n_skewers_processed_root, n_skewers_processed, 1, MPI_INT, MPI_SUM, world);
 
+    #else
 
-}
+  *Flux_mean_HI        = *Flux_mean_root_HI;
+  *Flux_mean_HeII      = *Flux_mean_root_HeII;
+  *n_skewers_processed = *n_skewers_processed_root;
 
+    #endif
 
-void Analysis_Module::Compute_Lya_Mean_Flux_Skewer( int skewer_id, int axis ){
+  *Flux_mean_HI   = *Flux_mean_HI / *n_skewers_processed;
+  *Flux_mean_HeII = *Flux_mean_HeII / *n_skewers_processed;
+  chprintf("  N_Skewers_Processed: %d  Flux Mean HI: %e  Flux_mean_HeII: %e \n", *n_skewers_processed, *Flux_mean_HI,
+           *Flux_mean_HeII);
+}
 
+void AnalysisModule::Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis)
+{
   bool am_I_root;
-  int  n_los;
-  int  *n_skewers_processed_root;
+  int n_los;
+  int *n_skewers_processed_root;
   Real *F_mean_root_HI;
   Real *F_mean_root_HeII;
   Real *skewers_transmitted_flux_HI;
   Real *skewers_transmitted_flux_HeII;
-  
-  if ( axis == 0 ){
-    am_I_root = am_I_root_x;
-    n_los = nx_total;
-    F_mean_root_HI   = &Flux_mean_root_HI_x;
-    F_mean_root_HeII = &Flux_mean_root_HeII_x;
+
+  if (axis == 0) {
+    am_I_root                     = am_I_root_x;
+    n_los                         = nx_total;
+    F_mean_root_HI                = &Flux_mean_root_HI_x;
+    F_mean_root_HeII              = &Flux_mean_root_HeII_x;
     skewers_transmitted_flux_HI   = skewers_transmitted_flux_HI_x;
     skewers_transmitted_flux_HeII = skewers_transmitted_flux_HeII_x;
-    n_skewers_processed_root = &n_skewers_processed_root_x; 
+    n_skewers_processed_root      = &n_skewers_processed_root_x;
   }
 
-  if ( axis == 1 ){
-    am_I_root = am_I_root_y;
-    n_los = ny_total;
-    F_mean_root_HI   = &Flux_mean_root_HI_y;
-    F_mean_root_HeII = &Flux_mean_root_HeII_y;
+  if (axis == 1) {
+    am_I_root                     = am_I_root_y;
+    n_los                         = ny_total;
+    F_mean_root_HI                = &Flux_mean_root_HI_y;
+    F_mean_root_HeII              = &Flux_mean_root_HeII_y;
     skewers_transmitted_flux_HI   = skewers_transmitted_flux_HI_y;
     skewers_transmitted_flux_HeII = skewers_transmitted_flux_HeII_y;
-    n_skewers_processed_root = &n_skewers_processed_root_y;
+    n_skewers_processed_root      = &n_skewers_processed_root_y;
   }
 
-  if ( axis == 2 ){
-    am_I_root = am_I_root_z;
-    n_los = nz_total;
-    F_mean_root_HI   = &Flux_mean_root_HI_z;
-    F_mean_root_HeII = &Flux_mean_root_HeII_z;
+  if (axis == 2) {
+    am_I_root                     = am_I_root_z;
+    n_los                         = nz_total;
+    F_mean_root_HI                = &Flux_mean_root_HI_z;
+    F_mean_root_HeII              = &Flux_mean_root_HeII_z;
     skewers_transmitted_flux_HI   = skewers_transmitted_flux_HI_z;
     skewers_transmitted_flux_HeII = skewers_transmitted_flux_HeII_z;
-    n_skewers_processed_root = &n_skewers_processed_root_z;
+    n_skewers_processed_root      = &n_skewers_processed_root_z;
   }
 
-  if ( !am_I_root ) return;
-  
+  if (!am_I_root) return;
+
   Real F_mean_HI, F_mean_HeII;
   F_mean_HI   = 0;
   F_mean_HeII = 0;
-  for ( int los_id=0; los_id<n_los; los_id++ ){
-    F_mean_HI += skewers_transmitted_flux_HI  [ skewer_id*n_los + los_id] / n_los;
-    F_mean_HeII += skewers_transmitted_flux_HeII[ skewer_id*n_los + los_id] / n_los;
+  for (int los_id = 0; los_id < n_los; los_id++) {
+    F_mean_HI += skewers_transmitted_flux_HI[skewer_id * n_los + los_id] / n_los;
+    F_mean_HeII += skewers_transmitted_flux_HeII[skewer_id * n_los + los_id] / n_los;
   }
-  
-  *F_mean_root_HI   += F_mean_HI;
+
+  *F_mean_root_HI += F_mean_HI;
   *F_mean_root_HeII += F_mean_HeII;
   *n_skewers_processed_root += 1;
-
 }
 
-
-
-void Analysis_Module::Initialize_Lya_Statistics_Measurements( int axis ){
-
-  if ( axis == 0 ){
+void AnalysisModule::Initialize_Lya_Statistics_Measurements(int axis)
+{
+  if (axis == 0) {
     n_skewers_processed_root_x = 0;
-    Flux_mean_root_HI_x = 0;
-    Flux_mean_root_HeII_x = 0;
+    Flux_mean_root_HI_x        = 0;
+    Flux_mean_root_HeII_x      = 0;
   }
 
-  if ( axis == 1 ){
+  if (axis == 1) {
     n_skewers_processed_root_y = 0;
-    Flux_mean_root_HI_y = 0;
-    Flux_mean_root_HeII_y = 0;
+    Flux_mean_root_HI_y        = 0;
+    Flux_mean_root_HeII_y      = 0;
   }
 
-  if ( axis == 2 ){
+  if (axis == 2) {
     n_skewers_processed_root_z = 0;
-    Flux_mean_root_HI_z = 0;
-    Flux_mean_root_HeII_z = 0;
+    Flux_mean_root_HI_z        = 0;
+    Flux_mean_root_HeII_z      = 0;
   }
-
 }
 
-void Grid3D::Compute_Transmitted_Flux_Skewer( int skewer_id, int axis ){
-  
+void Grid3D::Compute_Transmitted_Flux_Skewer(int skewer_id, int axis)
+{
   int n_los_full, n_los_total, n_ghost;
   bool am_I_root;
   Real *full_density_HI;
@@ -886,184 +863,178 @@ void Grid3D::Compute_Transmitted_Flux_Skewer( int skewer_id, int axis ){
 
   n_ghost = Analysis.n_ghost_skewer;
 
-
-  if ( axis == 0 ){
-    Lbox    = Analysis.Lbox_x;
-    delta_x = Analysis.dx;
-    am_I_root   = Analysis.am_I_root_x;
-    n_los_full  = Analysis.n_los_full_x;
-    n_los_total = Analysis.nx_total;
-    full_density_HI           = Analysis.full_HI_density_x;
-    full_density_HeII         = Analysis.full_HeII_density_x;
-    full_velocity             = Analysis.full_velocity_x;
-    full_temperature          = Analysis.full_temperature_x;
-    full_optical_depth_HI     = Analysis.full_optical_depth_HI_x;
-    full_optical_depth_HeII   = Analysis.full_optical_depth_HeII_x;
-    full_vel_Hubble           = Analysis.full_vel_Hubble_x;
-    skewers_HI_density_root   = Analysis.skewers_HI_density_root_x;
-    skewers_HeII_density_root = Analysis.skewers_HeII_density_root_x;
-    skewers_velocity_root     = Analysis.skewers_velocity_root_x; 
-    skewers_temperature_root  = Analysis.skewers_temperature_root_x; 
+  if (axis == 0) {
+    Lbox                          = Analysis.Lbox_x;
+    delta_x                       = Analysis.dx;
+    am_I_root                     = Analysis.am_I_root_x;
+    n_los_full                    = Analysis.n_los_full_x;
+    n_los_total                   = Analysis.nx_total;
+    full_density_HI               = Analysis.full_HI_density_x;
+    full_density_HeII             = Analysis.full_HeII_density_x;
+    full_velocity                 = Analysis.full_velocity_x;
+    full_temperature              = Analysis.full_temperature_x;
+    full_optical_depth_HI         = Analysis.full_optical_depth_HI_x;
+    full_optical_depth_HeII       = Analysis.full_optical_depth_HeII_x;
+    full_vel_Hubble               = Analysis.full_vel_Hubble_x;
+    skewers_HI_density_root       = Analysis.skewers_HI_density_root_x;
+    skewers_HeII_density_root     = Analysis.skewers_HeII_density_root_x;
+    skewers_velocity_root         = Analysis.skewers_velocity_root_x;
+    skewers_temperature_root      = Analysis.skewers_temperature_root_x;
     skewers_transmitted_flux_HI   = Analysis.skewers_transmitted_flux_HI_x;
     skewers_transmitted_flux_HeII = Analysis.skewers_transmitted_flux_HeII_x;
   }
 
-  if ( axis == 1 ){
-    Lbox    = Analysis.Lbox_y;
-    delta_x = Analysis.dy;
-    am_I_root   = Analysis.am_I_root_y;
-    n_los_full  = Analysis.n_los_full_y;
-    n_los_total = Analysis.ny_total;
-    full_density_HI           = Analysis.full_HI_density_y;
-    full_density_HeII         = Analysis.full_HeII_density_y;
-    full_density_HI           = Analysis.full_HI_density_y;
-    full_density_HeII         = Analysis.full_HeII_density_y;
-    full_velocity             = Analysis.full_velocity_y;
-    full_temperature          = Analysis.full_temperature_y;
-    full_optical_depth_HI     = Analysis.full_optical_depth_HI_y;
-    full_optical_depth_HeII   = Analysis.full_optical_depth_HeII_y;
-    full_vel_Hubble           = Analysis.full_vel_Hubble_y;
-    skewers_HI_density_root   = Analysis.skewers_HI_density_root_y;
-    skewers_HeII_density_root = Analysis.skewers_HeII_density_root_y;
-    skewers_velocity_root     = Analysis.skewers_velocity_root_y;
-    skewers_temperature_root  = Analysis.skewers_temperature_root_y;
+  if (axis == 1) {
+    Lbox                          = Analysis.Lbox_y;
+    delta_x                       = Analysis.dy;
+    am_I_root                     = Analysis.am_I_root_y;
+    n_los_full                    = Analysis.n_los_full_y;
+    n_los_total                   = Analysis.ny_total;
+    full_density_HI               = Analysis.full_HI_density_y;
+    full_density_HeII             = Analysis.full_HeII_density_y;
+    full_density_HI               = Analysis.full_HI_density_y;
+    full_density_HeII             = Analysis.full_HeII_density_y;
+    full_velocity                 = Analysis.full_velocity_y;
+    full_temperature              = Analysis.full_temperature_y;
+    full_optical_depth_HI         = Analysis.full_optical_depth_HI_y;
+    full_optical_depth_HeII       = Analysis.full_optical_depth_HeII_y;
+    full_vel_Hubble               = Analysis.full_vel_Hubble_y;
+    skewers_HI_density_root       = Analysis.skewers_HI_density_root_y;
+    skewers_HeII_density_root     = Analysis.skewers_HeII_density_root_y;
+    skewers_velocity_root         = Analysis.skewers_velocity_root_y;
+    skewers_temperature_root      = Analysis.skewers_temperature_root_y;
     skewers_transmitted_flux_HI   = Analysis.skewers_transmitted_flux_HI_y;
     skewers_transmitted_flux_HeII = Analysis.skewers_transmitted_flux_HeII_y;
   }
 
-  if ( axis == 2 ){
-    Lbox    = Analysis.Lbox_z;
-    delta_x = Analysis.dz;
-    am_I_root   = Analysis.am_I_root_z;
-    n_los_full  = Analysis.n_los_full_z;
-    n_los_total = Analysis.nz_total;
-    full_density_HI           = Analysis.full_HI_density_z;
-    full_density_HeII         = Analysis.full_HeII_density_z;
-    full_velocity             = Analysis.full_velocity_z;
-    full_temperature          = Analysis.full_temperature_z;
-    full_optical_depth_HI     = Analysis.full_optical_depth_HI_z;
-    full_optical_depth_HeII   = Analysis.full_optical_depth_HeII_z;
-    full_vel_Hubble           = Analysis.full_vel_Hubble_z;
-    skewers_HI_density_root   = Analysis.skewers_HI_density_root_z;
-    skewers_HeII_density_root = Analysis.skewers_HeII_density_root_z;
-    skewers_velocity_root     = Analysis.skewers_velocity_root_z;
-    skewers_temperature_root  = Analysis.skewers_temperature_root_z;
+  if (axis == 2) {
+    Lbox                          = Analysis.Lbox_z;
+    delta_x                       = Analysis.dz;
+    am_I_root                     = Analysis.am_I_root_z;
+    n_los_full                    = Analysis.n_los_full_z;
+    n_los_total                   = Analysis.nz_total;
+    full_density_HI               = Analysis.full_HI_density_z;
+    full_density_HeII             = Analysis.full_HeII_density_z;
+    full_velocity                 = Analysis.full_velocity_z;
+    full_temperature              = Analysis.full_temperature_z;
+    full_optical_depth_HI         = Analysis.full_optical_depth_HI_z;
+    full_optical_depth_HeII       = Analysis.full_optical_depth_HeII_z;
+    full_vel_Hubble               = Analysis.full_vel_Hubble_z;
+    skewers_HI_density_root       = Analysis.skewers_HI_density_root_z;
+    skewers_HeII_density_root     = Analysis.skewers_HeII_density_root_z;
+    skewers_velocity_root         = Analysis.skewers_velocity_root_z;
+    skewers_temperature_root      = Analysis.skewers_temperature_root_z;
     skewers_transmitted_flux_HI   = Analysis.skewers_transmitted_flux_HI_z;
     skewers_transmitted_flux_HeII = Analysis.skewers_transmitted_flux_HeII_z;
   }
 
-  if ( !am_I_root ) return;
+  if (!am_I_root) return;
 
   // printf( "  Computing Skewer ID: %d \n", skewer_id );
-  
-  Real density_HI, density_HeII, velocity, temperature, Msun, kpc, Mp, kpc3, Me, e_charge, c, Kb;
-  
-  // Constants in CGS
-  Kb   = 1.38064852e-16; //g (cm/s)^2 K-1
-  Msun = 1.98847e33;     //g
-  Mp   = 1.6726219e-24;  //g
-  Me   = 9.10938356e-28; //g
-  c    = 2.99792458e10;    //cm/s
-  kpc  = 3.0857e21;        //cm
-  kpc3 = kpc * kpc * kpc;
-  e_charge =  4.8032e-10;  // cm^3/2 g^1/2 s^-1
 
+  Real density_HI, density_HeII, velocity, temperature, Msun, kpc, Mp, kpc3, Me, e_charge, c, Kb;
 
+  // Constants in CGS
+  Kb       = 1.38064852e-16;  // g (cm/s)^2 K-1
+  Msun     = 1.98847e33;      // g
+  Mp       = 1.6726219e-24;   // g
+  Me       = 9.10938356e-28;  // g
+  c        = 2.99792458e10;   // cm/s
+  kpc      = 3.0857e21;       // cm
+  kpc3     = kpc * kpc * kpc;
+  e_charge = 4.8032e-10;  // cm^3/2 g^1/2 s^-1
 
   // Fill the Real cells first
-  for (int los_id=0; los_id<n_los_total; los_id++ ){
-    density_HI   = skewers_HI_density_root   [ skewer_id*n_los_total + los_id ];
-    density_HeII = skewers_HeII_density_root [ skewer_id*n_los_total + los_id ];
-    velocity     = skewers_velocity_root     [ skewer_id*n_los_total + los_id ];
-    temperature  = skewers_temperature_root  [ skewer_id*n_los_total + los_id ];
-    full_density_HI   [los_id + n_ghost] = density_HI;
-    full_density_HeII [los_id + n_ghost] = density_HeII;
-    full_velocity     [los_id + n_ghost] = velocity;
-    full_temperature  [los_id + n_ghost] = temperature;
+  for (int los_id = 0; los_id < n_los_total; los_id++) {
+    density_HI                          = skewers_HI_density_root[skewer_id * n_los_total + los_id];
+    density_HeII                        = skewers_HeII_density_root[skewer_id * n_los_total + los_id];
+    velocity                            = skewers_velocity_root[skewer_id * n_los_total + los_id];
+    temperature                         = skewers_temperature_root[skewer_id * n_los_total + los_id];
+    full_density_HI[los_id + n_ghost]   = density_HI;
+    full_density_HeII[los_id + n_ghost] = density_HeII;
+    full_velocity[los_id + n_ghost]     = velocity;
+    full_temperature[los_id + n_ghost]  = temperature;
   }
 
   // Fill the ghost cells
-  for ( int los_id=0; los_id<n_ghost; los_id++ ){
-    full_density_HI  [los_id] = full_density_HI  [n_los_total+los_id];
-    full_density_HeII[los_id] = full_density_HeII[n_los_total+los_id];
-    full_velocity    [los_id] = full_velocity    [n_los_total+los_id];
-    full_temperature [los_id] = full_temperature [n_los_total+los_id];
-    full_density_HI  [n_los_total+n_ghost+los_id] = full_density_HI  [n_ghost+los_id];
-    full_density_HeII[n_los_total+n_ghost+los_id] = full_density_HeII[n_ghost+los_id];
-    full_velocity    [n_los_total+n_ghost+los_id] = full_velocity    [n_ghost+los_id];
-    full_temperature [n_los_total+n_ghost+los_id] = full_temperature [n_ghost+los_id];
-  } 
-  
+  for (int los_id = 0; los_id < n_ghost; los_id++) {
+    full_density_HI[los_id]                           = full_density_HI[n_los_total + los_id];
+    full_density_HeII[los_id]                         = full_density_HeII[n_los_total + los_id];
+    full_velocity[los_id]                             = full_velocity[n_los_total + los_id];
+    full_temperature[los_id]                          = full_temperature[n_los_total + los_id];
+    full_density_HI[n_los_total + n_ghost + los_id]   = full_density_HI[n_ghost + los_id];
+    full_density_HeII[n_los_total + n_ghost + los_id] = full_density_HeII[n_ghost + los_id];
+    full_velocity[n_los_total + n_ghost + los_id]     = full_velocity[n_ghost + los_id];
+    full_temperature[n_los_total + n_ghost + los_id]  = full_temperature[n_ghost + los_id];
+  }
 
   Real dens_factor, dens_factor_HI, dens_factor_HeII, vel_factor;
-  dens_factor = 1. / ( Cosmo.current_a * Cosmo.current_a * Cosmo.current_a ) * Cosmo.cosmo_h * Cosmo.cosmo_h; 
-  dens_factor_HI   = dens_factor * Msun / ( kpc3 ) / Mp;
-  dens_factor_HeII = dens_factor * Msun / ( kpc3 ) / (4*Mp);
-  vel_factor = 1e5; //cm/s
+  dens_factor      = 1. / (Cosmo.current_a * Cosmo.current_a * Cosmo.current_a) * Cosmo.cosmo_h * Cosmo.cosmo_h;
+  dens_factor_HI   = dens_factor * Msun / (kpc3) / Mp;
+  dens_factor_HeII = dens_factor * Msun / (kpc3) / (4 * Mp);
+  vel_factor       = 1e5;  // cm/s
 
   // Get Cosmological variables
-  Real H, current_a, L_proper, dx_proper, dv_Hubble; 
+  Real H, current_a, L_proper, dx_proper, dv_Hubble;
   Real H_cgs, Lya_lambda_HI, f_12, Lya_sigma_HI;
   Real Lya_lambda_HeII, Lya_sigma_HeII;
   current_a = Cosmo.current_a;
-  L_proper = Lbox * current_a / Cosmo.cosmo_h;
+  L_proper  = Lbox * current_a / Cosmo.cosmo_h;
   dx_proper = delta_x * current_a / Cosmo.cosmo_h;
-  H = Cosmo.Get_Hubble_Parameter( current_a );
-  dv_Hubble = H * dx_proper * vel_factor; // cm/s
-  
+  H         = Cosmo.Get_Hubble_Parameter(current_a);
+  dv_Hubble = H * dx_proper * vel_factor;  // cm/s
+
   // Fill the Hubble velocity with ghost cells
-  for ( int los_id=0; los_id<n_los_full; los_id++ ){
-    full_vel_Hubble[los_id] = ( los_id - n_ghost + 0.5 ) * dv_Hubble;
-  }
-  
-  Lya_lambda_HI   = 1.21567e-5;          // cm  Rest wave length of the Lyman Alpha Transition Hydrogen
-  Lya_lambda_HeII = Lya_lambda_HI / 4;   // cm  Rest wave length of the Lyman Alpha Transition Helium II
-  f_12 =  0.416;                         // Lya transition Oscillator strength
-  H_cgs = H * 1e5 / kpc;
-  
-  Lya_sigma_HI   = M_PI * e_charge * e_charge / Me / c * Lya_lambda_HI   * f_12 / H_cgs;
-  Lya_sigma_HeII = M_PI * e_charge * e_charge / Me / c * Lya_lambda_HeII * f_12 / H_cgs;   
-    
-  //Compute the optical depth
-  Real b_HI_j,   n_HI_j; 
+  for (int los_id = 0; los_id < n_los_full; los_id++) {
+    full_vel_Hubble[los_id] = (los_id - n_ghost + 0.5) * dv_Hubble;
+  }
+
+  Lya_lambda_HI = 1.21567e-5;           // cm  Rest wave length of the Lyman Alpha
+                                        // Transition Hydrogen
+  Lya_lambda_HeII = Lya_lambda_HI / 4;  // cm  Rest wave length of the Lyman Alpha Transition Helium II
+  f_12            = 0.416;              // Lya transition Oscillator strength
+  H_cgs           = H * 1e5 / kpc;
+
+  Lya_sigma_HI   = M_PI * e_charge * e_charge / Me / c * Lya_lambda_HI * f_12 / H_cgs;
+  Lya_sigma_HeII = M_PI * e_charge * e_charge / Me / c * Lya_lambda_HeII * f_12 / H_cgs;
+
+  // Compute the optical depth
+  Real b_HI_j, n_HI_j;
   Real b_HeII_j, n_HeII_j;
-  Real tau_HI_i, tau_HeII_i; 
+  Real tau_HI_i, tau_HeII_i;
   Real vel_i, vel_j, y_l, y_r;
-  for ( int i=0; i<n_los_full; i++ ){
-    vel_i = full_vel_Hubble[i];
-    tau_HI_i = 0;
+  for (int i = 0; i < n_los_full; i++) {
+    vel_i      = full_vel_Hubble[i];
+    tau_HI_i   = 0;
     tau_HeII_i = 0;
-    for ( int j=0; j<n_los_full; j++ ){
-      n_HI_j   = full_density_HI[j]   * dens_factor_HI;
+    for (int j = 0; j < n_los_full; j++) {
+      n_HI_j   = full_density_HI[j] * dens_factor_HI;
       n_HeII_j = full_density_HeII[j] * dens_factor_HeII;
-      vel_j    =  full_vel_Hubble[j] + ( full_velocity[j] * vel_factor );
-      b_HI_j   = sqrt( 2 * Kb / Mp     * full_temperature[j] );
-      b_HeII_j = sqrt( 2 * Kb / (4*Mp) * full_temperature[j] );
-      y_l = ( vel_i - 0.5*dv_Hubble - vel_j ) / b_HI_j;
-      y_r = ( vel_i + 0.5*dv_Hubble - vel_j ) / b_HI_j;
-      tau_HI_i   += n_HI_j * ( erf(y_r) - erf(y_l) ) / 2;
-      y_l = ( vel_i - 0.5*dv_Hubble - vel_j ) / b_HeII_j;
-      y_r = ( vel_i + 0.5*dv_Hubble - vel_j ) / b_HeII_j;
-      tau_HeII_i += n_HeII_j * ( erf(y_r) - erf(y_l) ) / 2;
+      vel_j    = full_vel_Hubble[j] + (full_velocity[j] * vel_factor);
+      b_HI_j   = sqrt(2 * Kb / Mp * full_temperature[j]);
+      b_HeII_j = sqrt(2 * Kb / (4 * Mp) * full_temperature[j]);
+      y_l      = (vel_i - 0.5 * dv_Hubble - vel_j) / b_HI_j;
+      y_r      = (vel_i + 0.5 * dv_Hubble - vel_j) / b_HI_j;
+      tau_HI_i += n_HI_j * (erf(y_r) - erf(y_l)) / 2;
+      y_l = (vel_i - 0.5 * dv_Hubble - vel_j) / b_HeII_j;
+      y_r = (vel_i + 0.5 * dv_Hubble - vel_j) / b_HeII_j;
+      tau_HeII_i += n_HeII_j * (erf(y_r) - erf(y_l)) / 2;
     }
-    tau_HI_i   *= Lya_sigma_HI;
+    tau_HI_i *= Lya_sigma_HI;
     tau_HeII_i *= Lya_sigma_HeII;
     full_optical_depth_HI[i]   = tau_HI_i;
-    full_optical_depth_HeII[i] = tau_HeII_i;    
+    full_optical_depth_HeII[i] = tau_HeII_i;
   }
 
   // Compute the transmitted_flux
-  for ( int los_id=0; los_id<n_los_total; los_id++ ){
-    skewers_transmitted_flux_HI  [skewer_id*n_los_total + los_id] = exp( -full_optical_depth_HI  [los_id + n_ghost] );
-    skewers_transmitted_flux_HeII[skewer_id*n_los_total + los_id] = exp( -full_optical_depth_HeII[los_id + n_ghost] );
+  for (int los_id = 0; los_id < n_los_total; los_id++) {
+    skewers_transmitted_flux_HI[skewer_id * n_los_total + los_id]   = exp(-full_optical_depth_HI[los_id + n_ghost]);
+    skewers_transmitted_flux_HeII[skewer_id * n_los_total + los_id] = exp(-full_optical_depth_HeII[los_id + n_ghost]);
   }
-
-
-
 }
 
-void Analysis_Module::Transfer_Skewers_Data( int axis ){
-
+void AnalysisModule::Transfer_Skewers_Data(int axis)
+{
   bool am_I_root;
   int n_skewers, n_los_local, n_los_total, root_id;
   Real *skewers_HI_density_local;
@@ -1074,191 +1045,185 @@ void Analysis_Module::Transfer_Skewers_Data( int axis ){
   Real *skewers_HeII_density_root;
   Real *skewers_velocity_root;
   Real *skewers_temperature_root;
-  
-  #ifdef OUTPUT_SKEWERS
+
+    #ifdef OUTPUT_SKEWERS
   Real *skewers_density_local;
   Real *skewers_density_root;
-  #endif
-  
-  #ifdef MPI_CHOLLA
+    #endif
+
+    #ifdef MPI_CHOLLA
   vector<int> mpi_indices;
   MPI_Status mpi_status;
 
-  #endif
+    #endif
 
-  if ( axis == 0 ){
-    root_id = root_id_x;
-    am_I_root = am_I_root_x;
-    n_los_local = nx_local;
-    n_los_total = nx_total;
-    n_skewers = n_skewers_local_x;
-    skewers_HI_density_local = skewers_HI_density_local_x;
-    skewers_HI_density_root = skewers_HI_density_root_x;
+  if (axis == 0) {
+    root_id                    = root_id_x;
+    am_I_root                  = am_I_root_x;
+    n_los_local                = nx_local;
+    n_los_total                = nx_total;
+    n_skewers                  = n_skewers_local_x;
+    skewers_HI_density_local   = skewers_HI_density_local_x;
+    skewers_HI_density_root    = skewers_HI_density_root_x;
     skewers_HeII_density_local = skewers_HeII_density_local_x;
     skewers_HeII_density_root  = skewers_HeII_density_root_x;
-    skewers_velocity_local = skewers_velocity_local_x;
-    skewers_temperature_local = skewers_temperature_local_x;
-    skewers_velocity_root = skewers_velocity_root_x;
-    skewers_temperature_root = skewers_temperature_root_x;
+    skewers_velocity_local     = skewers_velocity_local_x;
+    skewers_temperature_local  = skewers_temperature_local_x;
+    skewers_velocity_root      = skewers_velocity_root_x;
+    skewers_temperature_root   = skewers_temperature_root_x;
     #ifdef MPI_CHOLLA
     mpi_indices = mpi_indices_x;
     #endif
     #ifdef OUTPUT_SKEWERS
-    skewers_density_root = skewers_density_root_x;
+    skewers_density_root  = skewers_density_root_x;
     skewers_density_local = skewers_density_local_x;
     #endif
   }
 
-
-  if ( axis == 1 ){
-    root_id = root_id_y;
-    am_I_root = am_I_root_y;
-    n_los_local = ny_local;
-    n_los_total = ny_total;
-    n_skewers = n_skewers_local_y;
-    skewers_HI_density_local = skewers_HI_density_local_y;
+  if (axis == 1) {
+    root_id                    = root_id_y;
+    am_I_root                  = am_I_root_y;
+    n_los_local                = ny_local;
+    n_los_total                = ny_total;
+    n_skewers                  = n_skewers_local_y;
+    skewers_HI_density_local   = skewers_HI_density_local_y;
     skewers_HeII_density_local = skewers_HeII_density_local_y;
     skewers_HeII_density_root  = skewers_HeII_density_root_y;
-    skewers_velocity_local = skewers_velocity_local_y;
-    skewers_temperature_local = skewers_temperature_local_y;
-    skewers_HI_density_root = skewers_HI_density_root_y;
-    skewers_velocity_root = skewers_velocity_root_y;
-    skewers_temperature_root = skewers_temperature_root_y;
+    skewers_velocity_local     = skewers_velocity_local_y;
+    skewers_temperature_local  = skewers_temperature_local_y;
+    skewers_HI_density_root    = skewers_HI_density_root_y;
+    skewers_velocity_root      = skewers_velocity_root_y;
+    skewers_temperature_root   = skewers_temperature_root_y;
     #ifdef MPI_CHOLLA
     mpi_indices = mpi_indices_y;
     #endif
     #ifdef OUTPUT_SKEWERS
-    skewers_density_root = skewers_density_root_y;
+    skewers_density_root  = skewers_density_root_y;
     skewers_density_local = skewers_density_local_y;
     #endif
   }
 
-
-  if ( axis == 2 ){
-    root_id = root_id_z;
-    am_I_root = am_I_root_z;
-    n_los_local = nz_local;
-    n_los_total = nz_total;
-    n_skewers = n_skewers_local_z;
-    skewers_HI_density_local = skewers_HI_density_local_z;
+  if (axis == 2) {
+    root_id                    = root_id_z;
+    am_I_root                  = am_I_root_z;
+    n_los_local                = nz_local;
+    n_los_total                = nz_total;
+    n_skewers                  = n_skewers_local_z;
+    skewers_HI_density_local   = skewers_HI_density_local_z;
     skewers_HeII_density_local = skewers_HeII_density_local_z;
     skewers_HeII_density_root  = skewers_HeII_density_root_z;
-    skewers_velocity_local = skewers_velocity_local_z;
-    skewers_temperature_local = skewers_temperature_local_z;
-    skewers_HI_density_root = skewers_HI_density_root_z;
-    skewers_velocity_root = skewers_velocity_root_z;
-    skewers_temperature_root = skewers_temperature_root_z;
+    skewers_velocity_local     = skewers_velocity_local_z;
+    skewers_temperature_local  = skewers_temperature_local_z;
+    skewers_HI_density_root    = skewers_HI_density_root_z;
+    skewers_velocity_root      = skewers_velocity_root_z;
+    skewers_temperature_root   = skewers_temperature_root_z;
     #ifdef MPI_CHOLLA
     mpi_indices = mpi_indices_z;
     #endif
     #ifdef OUTPUT_SKEWERS
-    skewers_density_root = skewers_density_root_z;
+    skewers_density_root  = skewers_density_root_z;
     skewers_density_local = skewers_density_local_z;
     #endif
   }
 
-
   // Copy Skewers Local Data to Root data
 
   Real HI_density, HeII_density, velocity, temperature;
   Real density;
-  
-  #ifdef MPI_CHOLLA
-  if ( am_I_root ){
 
-    if ( root_id != procID ){
-      printf("ERROR: Root ID doesn't match procID\n" );
+    #ifdef MPI_CHOLLA
+  if (am_I_root) {
+    if (root_id != procID) {
+      printf("ERROR: Root ID doesn't match procID\n");
       exit(-1);
     }
 
-    for ( int skewer_id=0; skewer_id<n_skewers; skewer_id++){
-      for ( int los_id=0; los_id<n_los_local; los_id++){
-        HI_density   = skewers_HI_density_local[skewer_id*n_los_local + los_id];
-        HeII_density = skewers_HeII_density_local[skewer_id*n_los_local + los_id];
-        velocity     = skewers_velocity_local[skewer_id*n_los_local + los_id];
-        temperature  = skewers_temperature_local[skewer_id*n_los_local + los_id];
-        skewers_HI_density_root[skewer_id*n_los_total + los_id]   = HI_density;
-        skewers_HeII_density_root[skewer_id*n_los_total + los_id] = HeII_density;
-        skewers_velocity_root[skewer_id*n_los_total + los_id]     = velocity;
-        skewers_temperature_root[skewer_id*n_los_total + los_id]  = temperature;
-        #ifdef OUTPUT_SKEWERS
-        density   = skewers_density_local[skewer_id*n_los_local + los_id];  
-        skewers_density_root[skewer_id*n_los_total + los_id] = density;
-        #endif
+    for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) {
+      for (int los_id = 0; los_id < n_los_local; los_id++) {
+        HI_density   = skewers_HI_density_local[skewer_id * n_los_local + los_id];
+        HeII_density = skewers_HeII_density_local[skewer_id * n_los_local + los_id];
+        velocity     = skewers_velocity_local[skewer_id * n_los_local + los_id];
+        temperature  = skewers_temperature_local[skewer_id * n_los_local + los_id];
+        skewers_HI_density_root[skewer_id * n_los_total + los_id]   = HI_density;
+        skewers_HeII_density_root[skewer_id * n_los_total + los_id] = HeII_density;
+        skewers_velocity_root[skewer_id * n_los_total + los_id]     = velocity;
+        skewers_temperature_root[skewer_id * n_los_total + los_id]  = temperature;
+      #ifdef OUTPUT_SKEWERS
+        density = skewers_density_local[skewer_id * n_los_local + los_id];
+        skewers_density_root[skewer_id * n_los_total + los_id] = density;
+      #endif
       }
     }
 
     int n_indices = mpi_indices.size();
 
-
-
-    #ifdef PRINT_ANALYSIS_LOG
-    printf( "  N MPI indices: %d \n", n_indices );
-    #endif
+      #ifdef PRINT_ANALYSIS_LOG
+    printf("  N MPI indices: %d \n", n_indices);
+      #endif
 
     int mpi_id;
-    for ( int indx=0; indx<n_indices; indx++ ){
+    for (int indx = 0; indx < n_indices; indx++) {
       mpi_id = mpi_indices[indx];
-      if ( indx == 0 ){
-        if ( mpi_id != procID ){
-          printf( "ERROR: Fist MPI indx doesn't match root indx \n");
+      if (indx == 0) {
+        if (mpi_id != procID) {
+          printf("ERROR: Fist MPI indx doesn't match root indx \n");
           exit(-1);
         }
         continue;
       }
 
-
       #ifdef PRINT_ANALYSIS_LOG
-      printf("  Receiving Skewers From pID: %d\n", mpi_id );
+      printf("  Receiving Skewers From pID: %d\n", mpi_id);
       #endif
 
-      MPI_Recv( skewers_HI_density_local,   n_skewers*n_los_local, MPI_CHREAL, mpi_id, 0, world, &mpi_status  );
-      MPI_Recv( skewers_velocity_local,     n_skewers*n_los_local, MPI_CHREAL, mpi_id, 1, world, &mpi_status  );
-      MPI_Recv( skewers_temperature_local,  n_skewers*n_los_local, MPI_CHREAL, mpi_id, 2, world, &mpi_status  );
-      MPI_Recv( skewers_HeII_density_local, n_skewers*n_los_local, MPI_CHREAL, mpi_id, 3, world, &mpi_status  );
-      
+      MPI_Recv(skewers_HI_density_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 0, world, &mpi_status);
+      MPI_Recv(skewers_velocity_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 1, world, &mpi_status);
+      MPI_Recv(skewers_temperature_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 2, world, &mpi_status);
+      MPI_Recv(skewers_HeII_density_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 3, world, &mpi_status);
+
       #ifdef OUTPUT_SKEWERS
-      MPI_Recv( skewers_density_local,       n_skewers*n_los_local, MPI_CHREAL, mpi_id, 4, world, &mpi_status  );
+      MPI_Recv(skewers_density_local, n_skewers * n_los_local, MPI_CHREAL, mpi_id, 4, world, &mpi_status);
+      #endif
+
+      for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) {
+        for (int los_id = 0; los_id < n_los_local; los_id++) {
+          skewers_HI_density_root[skewer_id * n_los_total + indx * n_los_local + los_id] =
+              skewers_HI_density_local[skewer_id * n_los_local + los_id];
+          skewers_HeII_density_root[skewer_id * n_los_total + indx * n_los_local + los_id] =
+              skewers_HeII_density_local[skewer_id * n_los_local + los_id];
+          skewers_velocity_root[skewer_id * n_los_total + indx * n_los_local + los_id] =
+              skewers_velocity_local[skewer_id * n_los_local + los_id];
+          skewers_temperature_root[skewer_id * n_los_total + indx * n_los_local + los_id] =
+              skewers_temperature_local[skewer_id * n_los_local + los_id];
+      #ifdef OUTPUT_SKEWERS
+          skewers_density_root[skewer_id * n_los_total + indx * n_los_local + los_id] =
+              skewers_density_local[skewer_id * n_los_local + los_id];
       #endif
-            
-      for ( int skewer_id=0; skewer_id<n_skewers; skewer_id++){
-        for ( int los_id=0; los_id<n_los_local; los_id++){
-          skewers_HI_density_root   [skewer_id*n_los_total + indx*n_los_local + los_id] = skewers_HI_density_local   [skewer_id*n_los_local + los_id];
-          skewers_HeII_density_root [skewer_id*n_los_total + indx*n_los_local + los_id] = skewers_HeII_density_local [skewer_id*n_los_local + los_id];
-          skewers_velocity_root     [skewer_id*n_los_total + indx*n_los_local + los_id] = skewers_velocity_local     [skewer_id*n_los_local + los_id];
-          skewers_temperature_root  [skewer_id*n_los_total + indx*n_los_local + los_id] = skewers_temperature_local  [skewer_id*n_los_local + los_id];
-          #ifdef OUTPUT_SKEWERS
-          skewers_density_root   [skewer_id*n_los_total + indx*n_los_local + los_id] = skewers_density_local   [skewer_id*n_los_local + los_id];
-          #endif
         }
       }
-
     }
   }
 
-  else{
+  else {
+    MPI_Send(skewers_HI_density_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 0, world);
+    MPI_Send(skewers_velocity_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 1, world);
+    MPI_Send(skewers_temperature_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 2, world);
+    MPI_Send(skewers_HeII_density_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 3, world);
+      #ifdef OUTPUT_SKEWERS
+    MPI_Send(skewers_density_local, n_skewers * n_los_local, MPI_CHREAL, root_id, 4, world);
+      #endif
+  }
 
-    MPI_Send( skewers_HI_density_local,   n_skewers*n_los_local, MPI_CHREAL, root_id, 0, world  );
-    MPI_Send( skewers_velocity_local,     n_skewers*n_los_local, MPI_CHREAL, root_id, 1, world  );
-    MPI_Send( skewers_temperature_local,  n_skewers*n_los_local, MPI_CHREAL, root_id, 2, world  );
-    MPI_Send( skewers_HeII_density_local, n_skewers*n_los_local, MPI_CHREAL, root_id, 3, world  );
-    #ifdef OUTPUT_SKEWERS
-    MPI_Send( skewers_density_local,      n_skewers*n_los_local, MPI_CHREAL, root_id, 4, world  );
+  MPI_Barrier(world);
     #endif
-  } 
-  
-  MPI_Barrier( world );
-  #endif  
-    
-  #ifdef PRINT_ANALYSIS_LOG
-  chprintf("  Skewers Data Transferred\n" );
-  #endif
-}
-
 
+    #ifdef PRINT_ANALYSIS_LOG
+  chprintf("  Skewers Data Transferred\n");
+    #endif
+}
 
-void Grid3D::Populate_Lya_Skewers_Local( int axis ){
-
+void Grid3D::Populate_Lya_Skewers_Local(int axis)
+{
   int nx_local, ny_local, nz_local, n_ghost;
   int nx_grid, ny_grid, nz_grid;
   int ni, nj, n_los, stride, n_skewers_local;
@@ -1268,215 +1233,212 @@ void Grid3D::Populate_Lya_Skewers_Local( int axis ){
   Real *velocity_los;
   Real *temperature_los;
   Real *density_los;
-  
+
   nx_local = Analysis.nx_local;
   ny_local = Analysis.ny_local;
   nz_local = Analysis.nz_local;
-  n_ghost = Analysis.n_ghost;
-  nx_grid = nx_local + 2*n_ghost;
-  ny_grid = ny_local + 2*n_ghost;
-  nz_grid = nz_local + 2*n_ghost;
-  stride = Analysis.n_stride;
+  n_ghost  = Analysis.n_ghost;
+  nx_grid  = nx_local + 2 * n_ghost;
+  ny_grid  = ny_local + 2 * n_ghost;
+  nz_grid  = nz_local + 2 * n_ghost;
+  stride   = Analysis.n_stride;
 
   // X axis
-  if ( axis == 0 ){
-    n_los = nx_local;
-    ni    = ny_local;
-    nj    = nz_local;
-    n_skewers_local = Analysis.n_skewers_local_x;
-    momentum_los = C.momentum_x;
-    HI_density_los    = Analysis.skewers_HI_density_local_x;
-    HeII_density_los  = Analysis.skewers_HeII_density_local_x;
-    velocity_los      = Analysis.skewers_velocity_local_x;
-    temperature_los   = Analysis.skewers_temperature_local_x;
+  if (axis == 0) {
+    n_los            = nx_local;
+    ni               = ny_local;
+    nj               = nz_local;
+    n_skewers_local  = Analysis.n_skewers_local_x;
+    momentum_los     = C.momentum_x;
+    HI_density_los   = Analysis.skewers_HI_density_local_x;
+    HeII_density_los = Analysis.skewers_HeII_density_local_x;
+    velocity_los     = Analysis.skewers_velocity_local_x;
+    temperature_los  = Analysis.skewers_temperature_local_x;
     #ifdef OUTPUT_SKEWERS
-    density_los       = Analysis.skewers_density_local_x;
-    #endif 
-  } 
-  
+    density_los = Analysis.skewers_density_local_x;
+    #endif
+  }
+
   // Y axis
-  if ( axis == 1 ){
-    n_los = ny_local;
-    ni    = nx_local;
-    nj    = nz_local;
-    n_skewers_local = Analysis.n_skewers_local_y;
-    momentum_los = C.momentum_y;
-    HI_density_los    = Analysis.skewers_HI_density_local_y;
-    HeII_density_los  = Analysis.skewers_HeII_density_local_y;
-    velocity_los      = Analysis.skewers_velocity_local_y;
-    temperature_los   = Analysis.skewers_temperature_local_y;
+  if (axis == 1) {
+    n_los            = ny_local;
+    ni               = nx_local;
+    nj               = nz_local;
+    n_skewers_local  = Analysis.n_skewers_local_y;
+    momentum_los     = C.momentum_y;
+    HI_density_los   = Analysis.skewers_HI_density_local_y;
+    HeII_density_los = Analysis.skewers_HeII_density_local_y;
+    velocity_los     = Analysis.skewers_velocity_local_y;
+    temperature_los  = Analysis.skewers_temperature_local_y;
     #ifdef OUTPUT_SKEWERS
-    density_los       = Analysis.skewers_density_local_y;
-    #endif 
+    density_los = Analysis.skewers_density_local_y;
+    #endif
   }
 
   // Z axis
-  if ( axis == 2 ){
-    n_los = nz_local;
-    ni    = nx_local;
-    nj    = ny_local;
-    n_skewers_local = Analysis.n_skewers_local_z;
-    momentum_los = C.momentum_z;
-    HI_density_los    = Analysis.skewers_HI_density_local_z;
-    HeII_density_los  = Analysis.skewers_HeII_density_local_z;
-    velocity_los      = Analysis.skewers_velocity_local_z;
-    temperature_los   = Analysis.skewers_temperature_local_z;
+  if (axis == 2) {
+    n_los            = nz_local;
+    ni               = nx_local;
+    nj               = ny_local;
+    n_skewers_local  = Analysis.n_skewers_local_z;
+    momentum_los     = C.momentum_z;
+    HI_density_los   = Analysis.skewers_HI_density_local_z;
+    HeII_density_los = Analysis.skewers_HeII_density_local_z;
+    velocity_los     = Analysis.skewers_velocity_local_z;
+    temperature_los  = Analysis.skewers_temperature_local_z;
     #ifdef OUTPUT_SKEWERS
-    density_los       = Analysis.skewers_density_local_z;
-    #endif 
-  } 
-    
-  int n_iter_i, n_iter_j, id_grid;   
+    density_los = Analysis.skewers_density_local_z;
+    #endif
+  }
+
+  int n_iter_i, n_iter_j, id_grid;
   n_iter_i = ni / stride;
   n_iter_j = nj / stride;
   int id_i, id_j, skewer_id;
   Real density, HI_density, HeII_density, velocity, temperature;
   skewer_id = 0;
-  for ( int i=0; i<n_iter_i; i++ ){
-    for ( int j=0; j<n_iter_j; j++ ){
-      for ( int id_los=0; id_los<n_los; id_los++ ){
+  for (int i = 0; i < n_iter_i; i++) {
+    for (int j = 0; j < n_iter_j; j++) {
+      for (int id_los = 0; id_los < n_los; id_los++) {
         id_i = i * stride;
         id_j = j * stride;
-        if ( axis == 0 ) id_grid = ( id_los + n_ghost ) + ( id_i + n_ghost )*nx_grid   + ( id_j + n_ghost )*nx_grid*nz_grid;
-        if ( axis == 1 ) id_grid = ( id_i + n_ghost )   + ( id_los + n_ghost )*nx_grid + ( id_j + n_ghost )*nx_grid*nz_grid;
-        if ( axis == 2 ) id_grid = ( id_i + n_ghost )   + ( id_j + n_ghost )*nx_grid   + ( id_los + n_ghost )*nx_grid*nz_grid;
-        density = C.density[id_grid] * Cosmo.rho_0_gas;
+        if (axis == 0) id_grid = (id_los + n_ghost) + (id_i + n_ghost) * nx_grid + (id_j + n_ghost) * nx_grid * nz_grid;
+        if (axis == 1) id_grid = (id_i + n_ghost) + (id_los + n_ghost) * nx_grid + (id_j + n_ghost) * nx_grid * nz_grid;
+        if (axis == 2) id_grid = (id_i + n_ghost) + (id_j + n_ghost) * nx_grid + (id_los + n_ghost) * nx_grid * nz_grid;
+        density  = C.density[id_grid] * Cosmo.rho_0_gas;
         velocity = momentum_los[id_grid] * Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a / density;
-        #ifdef COOLING_GRACKLE
-        HI_density    = Cool.fields.HI_density[id_grid]   * Cosmo.rho_0_gas;
-        HeII_density  = Cool.fields.HeII_density[id_grid] * Cosmo.rho_0_gas;
-        temperature   = Cool.temperature[id_grid];
-        #elif defined CHEMISTRY_GPU
-        HI_density    = C.HI_density[id_grid]   * Cosmo.rho_0_gas;
-        HeII_density  = C.HeII_density[id_grid] * Cosmo.rho_0_gas;
-        temperature   = Chem.Fields.temperature_h[id_grid];
-        #else 
-        chprintf( "ERROR: Lya Statistics only supported for Grackle Cooling or CHEMISTRY_GPU\n");
+    #ifdef COOLING_GRACKLE
+        HI_density   = Cool.fields.HI_density[id_grid] * Cosmo.rho_0_gas;
+        HeII_density = Cool.fields.HeII_density[id_grid] * Cosmo.rho_0_gas;
+        temperature  = Cool.temperature[id_grid];
+    #elif defined CHEMISTRY_GPU
+        HI_density   = C.HI_density[id_grid] * Cosmo.rho_0_gas;
+        HeII_density = C.HeII_density[id_grid] * Cosmo.rho_0_gas;
+        temperature  = Chem.Fields.temperature_h[id_grid];
+    #else
+        chprintf(
+            "ERROR: Lya Statistics only supported for Grackle Cooling or "
+            "CHEMISTRY_GPU\n");
         exit(-1);
-        #endif
-        HI_density_los[skewer_id*n_los + id_los]   = HI_density;
-        HeII_density_los[skewer_id*n_los + id_los] = HeII_density;
-        velocity_los[skewer_id*n_los + id_los]     = velocity;
-        temperature_los[skewer_id*n_los + id_los]  = temperature;
-        #ifdef OUTPUT_SKEWERS
-        density_los[skewer_id*n_los + id_los]   = density;
-        #endif
+    #endif
+        HI_density_los[skewer_id * n_los + id_los]   = HI_density;
+        HeII_density_los[skewer_id * n_los + id_los] = HeII_density;
+        velocity_los[skewer_id * n_los + id_los]     = velocity;
+        temperature_los[skewer_id * n_los + id_los]  = temperature;
+    #ifdef OUTPUT_SKEWERS
+        density_los[skewer_id * n_los + id_los] = density;
+    #endif
       }
       skewer_id += 1;
     }
   }
 
-  if ( skewer_id != n_skewers_local ){
-    printf( "ERROR: Skewers numbers don't match.  ID: %d   N_skewers: %d \n ", skewer_id, n_skewers_local );
+  if (skewer_id != n_skewers_local) {
+    printf("ERROR: Skewers numbers don't match.  ID: %d   N_skewers: %d \n ", skewer_id, n_skewers_local);
     exit(-1);
   }
-
-
 }
 
-
-void Analysis_Module::Initialize_Lya_Statistics( struct parameters *P ){
-
-
+void AnalysisModule::Initialize_Lya_Statistics(struct Parameters *P)
+{
   chprintf(" Initializing Lya Statistics...\n");
 
-
-  n_ghost_skewer = max( nx_total, ny_total );
-  n_ghost_skewer = max( nz_total, n_ghost_skewer );
+  n_ghost_skewer = max(nx_total, ny_total);
+  n_ghost_skewer = max(nz_total, n_ghost_skewer);
   n_ghost_skewer = 0.1 * n_ghost_skewer;
-  n_los_full_x = nx_total + 2 * n_ghost_skewer;
-  n_los_full_y = ny_total + 2 * n_ghost_skewer;
-  n_los_full_z = nz_total + 2 * n_ghost_skewer;
-
-  n_fft_x = nx_total/2 + 1;
-  n_fft_y = ny_total/2 + 1;
-  n_fft_z = nz_total/2 + 1;
-
+  n_los_full_x   = nx_total + 2 * n_ghost_skewer;
+  n_los_full_y   = ny_total + 2 * n_ghost_skewer;
+  n_los_full_z   = nz_total + 2 * n_ghost_skewer;
 
+  n_fft_x = nx_total / 2 + 1;
+  n_fft_y = ny_total / 2 + 1;
+  n_fft_z = nz_total / 2 + 1;
 
   n_stride = P->lya_skewers_stride;
-  chprintf("  Lya Skewers Stride: %d\n", n_stride );
+  chprintf("  Lya Skewers Stride: %d\n", n_stride);
 
   d_log_k = P->lya_Pk_d_log_k;
-  chprintf("  Power Spectrum d_log_k: %f\n", d_log_k );
+  chprintf("  Power Spectrum d_log_k: %f\n", d_log_k);
 
-  n_skewers_local_x = ( ny_local / n_stride ) * ( nz_local / n_stride );
-  n_skewers_local_y = ( nx_local / n_stride ) * ( nz_local / n_stride );
-  n_skewers_local_z = ( nx_local / n_stride ) * ( ny_local / n_stride );
+  n_skewers_local_x = (ny_local / n_stride) * (nz_local / n_stride);
+  n_skewers_local_y = (nx_local / n_stride) * (nz_local / n_stride);
+  n_skewers_local_z = (nx_local / n_stride) * (ny_local / n_stride);
 
-  #ifdef MPI_CHOLLA
-  n_skewers_total_x = ( ny_total / n_stride ) * ( nz_total / n_stride );
-  n_skewers_total_y = ( nx_total / n_stride ) * ( nz_total / n_stride );
-  n_skewers_total_z = ( nx_total / n_stride ) * ( ny_total / n_stride );
-  #else
+    #ifdef MPI_CHOLLA
+  n_skewers_total_x = (ny_total / n_stride) * (nz_total / n_stride);
+  n_skewers_total_y = (nx_total / n_stride) * (nz_total / n_stride);
+  n_skewers_total_z = (nx_total / n_stride) * (ny_total / n_stride);
+    #else
   n_skewers_total_x = n_skewers_local_x;
   n_skewers_total_y = n_skewers_local_y;
   n_skewers_total_z = n_skewers_local_z;
-  #endif
-
+    #endif
 
   // Alocate Memory For Properties of Local Skewers
-  skewers_HI_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real));
-  skewers_HI_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real));
-  skewers_HI_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real));
-
-  skewers_HeII_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real));
-  skewers_HeII_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real));
-  skewers_HeII_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real));
-
-  skewers_velocity_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real));
-  skewers_velocity_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real));
-  skewers_velocity_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real));
-
-  skewers_temperature_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real));
-  skewers_temperature_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real));
-  skewers_temperature_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real));
-  
-  #ifdef OUTPUT_SKEWERS
-  skewers_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real));
-  skewers_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real));
-  skewers_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real));
-  #endif
-  
-  
-  // for (int i=0; i<nproc; i++ ){
-  //   // if ( procID == i  )  printf( " pID: %d    n_x: %d   n_y:%d   n_z:%d \n", procID, n_skewers_total_x, n_skewers_total_y, n_skewers_total_z );
-  //   if ( procID == i  )  printf( " pID: %d    n_x: %d   n_y:%d   n_z:%d \n", procID, n_skewers_local_x, n_skewers_local_y, n_skewers_local_z );
-  //   MPI_Barrier(world);
-  // }
+  skewers_HI_density_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real));
+  skewers_HI_density_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real));
+  skewers_HI_density_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real));
+
+  skewers_HeII_density_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real));
+  skewers_HeII_density_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real));
+  skewers_HeII_density_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real));
+
+  skewers_velocity_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real));
+  skewers_velocity_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real));
+  skewers_velocity_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real));
 
-  #ifdef MPI_CHOLLA
-  mpi_domain_boundary_x = (Real *) malloc(nproc*sizeof(Real));
-  mpi_domain_boundary_y = (Real *) malloc(nproc*sizeof(Real));
-  mpi_domain_boundary_z = (Real *) malloc(nproc*sizeof(Real));
+  skewers_temperature_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real));
+  skewers_temperature_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real));
+  skewers_temperature_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real));
+
+    #ifdef OUTPUT_SKEWERS
+  skewers_density_local_x = (Real *)malloc(n_skewers_local_x * nx_local * sizeof(Real));
+  skewers_density_local_y = (Real *)malloc(n_skewers_local_y * ny_local * sizeof(Real));
+  skewers_density_local_z = (Real *)malloc(n_skewers_local_z * nz_local * sizeof(Real));
+    #endif
 
+    // for (int i=0; i<nproc; i++ ){
+    //   // if ( procID == i  )  printf( " pID: %d    n_x: %d   n_y:%d   n_z:%d
+    //   \n", procID, n_skewers_total_x, n_skewers_total_y, n_skewers_total_z );
+    //   if ( procID == i  )  printf( " pID: %d    n_x: %d   n_y:%d   n_z:%d
+    //   \n", procID, n_skewers_local_x, n_skewers_local_y, n_skewers_local_z );
+    //   MPI_Barrier(world);
+    // }
 
-  MPI_Allgather(&xMin, 1, MPI_CHREAL, mpi_domain_boundary_x, 1, MPI_CHREAL, world );
-  MPI_Allgather(&yMin, 1, MPI_CHREAL, mpi_domain_boundary_y, 1, MPI_CHREAL, world );
-  MPI_Allgather(&zMin, 1, MPI_CHREAL, mpi_domain_boundary_z, 1, MPI_CHREAL, world );
+    #ifdef MPI_CHOLLA
+  mpi_domain_boundary_x = (Real *)malloc(nproc * sizeof(Real));
+  mpi_domain_boundary_y = (Real *)malloc(nproc * sizeof(Real));
+  mpi_domain_boundary_z = (Real *)malloc(nproc * sizeof(Real));
 
+  MPI_Allgather(&xMin, 1, MPI_CHREAL, mpi_domain_boundary_x, 1, MPI_CHREAL, world);
+  MPI_Allgather(&yMin, 1, MPI_CHREAL, mpi_domain_boundary_y, 1, MPI_CHREAL, world);
+  MPI_Allgather(&zMin, 1, MPI_CHREAL, mpi_domain_boundary_z, 1, MPI_CHREAL, world);
 
   root_id_x = -1;
   root_id_y = -1;
   root_id_z = -1;
 
   // Find root_id
-  for (int i=0; i<nproc; i++ ){
-    if ( mpi_domain_boundary_x[i] == xMin_global && mpi_domain_boundary_y[i] == yMin && mpi_domain_boundary_z[i] == zMin ) root_id_x = i;
-    if ( mpi_domain_boundary_y[i] == yMin_global && mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_z[i] == zMin ) root_id_y = i;
-    if ( mpi_domain_boundary_z[i] == zMin_global && mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_y[i] == yMin ) root_id_z = i;
+  for (int i = 0; i < nproc; i++) {
+    if (mpi_domain_boundary_x[i] == xMin_global && mpi_domain_boundary_y[i] == yMin && mpi_domain_boundary_z[i] == zMin)
+      root_id_x = i;
+    if (mpi_domain_boundary_y[i] == yMin_global && mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_z[i] == zMin)
+      root_id_y = i;
+    if (mpi_domain_boundary_z[i] == zMin_global && mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_y[i] == yMin)
+      root_id_z = i;
   }
 
   // for (int i=0; i<nproc; i++ ){
-  //   if ( procID == i  )  printf( " pID: %d    root_x: %d   root_y:%d   root_z:%d \n", procID, root_id_x, root_id_y, root_id_z );
+  //   if ( procID == i  )  printf( " pID: %d    root_x: %d   root_y:%d
+  //   root_z:%d \n", procID, root_id_x, root_id_y, root_id_z );
   //   MPI_Barrier(world);
   // }
 
-  //Construct the procIDs for the processors skewers
-  for (int i=0; i<nproc; i++ ){
-    if ( mpi_domain_boundary_y[i] == yMin && mpi_domain_boundary_z[i] == zMin ) mpi_indices_x.push_back(i);
-    if ( mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_z[i] == zMin ) mpi_indices_y.push_back(i);
-    if ( mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_y[i] == yMin ) mpi_indices_z.push_back(i);
+  // Construct the procIDs for the processors skewers
+  for (int i = 0; i < nproc; i++) {
+    if (mpi_domain_boundary_y[i] == yMin && mpi_domain_boundary_z[i] == zMin) mpi_indices_x.push_back(i);
+    if (mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_z[i] == zMin) mpi_indices_y.push_back(i);
+    if (mpi_domain_boundary_x[i] == xMin && mpi_domain_boundary_y[i] == yMin) mpi_indices_z.push_back(i);
   }
 
   int n_mpi_x = mpi_indices_x.size();
@@ -1485,53 +1447,51 @@ void Analysis_Module::Initialize_Lya_Statistics( struct parameters *P ){
 
   int temp_indx;
   bool sorted;
-  if ( n_mpi_x > 0 ){
+  if (n_mpi_x > 0) {
     sorted = true;
-    while ( !sorted ){
+    while (!sorted) {
       sorted = true;
-      for (int i=0; i<n_mpi_x-1; i++ ){
-        if ( mpi_domain_boundary_x[mpi_indices_x[i]] > mpi_domain_boundary_x[mpi_indices_x[i+1]] ){
-          temp_indx = mpi_indices_x[i];
-          mpi_indices_x[i] = mpi_indices_x[i+1];
-          mpi_indices_x[i+1] = temp_indx;
-          sorted = false;
+      for (int i = 0; i < n_mpi_x - 1; i++) {
+        if (mpi_domain_boundary_x[mpi_indices_x[i]] > mpi_domain_boundary_x[mpi_indices_x[i + 1]]) {
+          temp_indx            = mpi_indices_x[i];
+          mpi_indices_x[i]     = mpi_indices_x[i + 1];
+          mpi_indices_x[i + 1] = temp_indx;
+          sorted               = false;
         }
       }
     }
   }
 
-  if ( n_mpi_y > 0 ){
+  if (n_mpi_y > 0) {
     sorted = true;
-    while ( !sorted ){
+    while (!sorted) {
       sorted = true;
-      for (int i=0; i<n_mpi_y-1; i++ ){
-        if ( mpi_domain_boundary_y[mpi_indices_y[i]] > mpi_domain_boundary_y[mpi_indices_y[i+1]] ){
-          temp_indx = mpi_indices_y[i];
-          mpi_indices_y[i] = mpi_indices_y[i+1];
-          mpi_indices_y[i+1] = temp_indx;
-          sorted = false;
+      for (int i = 0; i < n_mpi_y - 1; i++) {
+        if (mpi_domain_boundary_y[mpi_indices_y[i]] > mpi_domain_boundary_y[mpi_indices_y[i + 1]]) {
+          temp_indx            = mpi_indices_y[i];
+          mpi_indices_y[i]     = mpi_indices_y[i + 1];
+          mpi_indices_y[i + 1] = temp_indx;
+          sorted               = false;
         }
       }
     }
   }
 
-  if ( n_mpi_z > 0 ){
+  if (n_mpi_z > 0) {
     sorted = true;
-    while ( !sorted ){
+    while (!sorted) {
       sorted = true;
-      for (int i=0; i<n_mpi_z-1; i++ ){
-        if ( mpi_domain_boundary_z[mpi_indices_z[i]] > mpi_domain_boundary_z[mpi_indices_z[i+1]] ){
-          temp_indx = mpi_indices_z[i];
-          mpi_indices_z[i] = mpi_indices_z[i+1];
-          mpi_indices_z[i+1] = temp_indx;
-          sorted = false;
+      for (int i = 0; i < n_mpi_z - 1; i++) {
+        if (mpi_domain_boundary_z[mpi_indices_z[i]] > mpi_domain_boundary_z[mpi_indices_z[i + 1]]) {
+          temp_indx            = mpi_indices_z[i];
+          mpi_indices_z[i]     = mpi_indices_z[i + 1];
+          mpi_indices_z[i + 1] = temp_indx;
+          sorted               = false;
         }
       }
     }
   }
 
-
-
   // for (int i=0; i<nproc; i++ ){
   //   if ( procID == i  ){
   //     printf( " pID: %d  \n", procID );
@@ -1550,229 +1510,228 @@ void Analysis_Module::Initialize_Lya_Statistics( struct parameters *P ){
   // }
   //
   //
-  if ( mpi_indices_x[0] != root_id_x ){
-    printf(" ERROR: Root id doesn't match mpi_indx list for x axis\n" );
+  if (mpi_indices_x[0] != root_id_x) {
+    printf(" ERROR: Root id doesn't match mpi_indx list for x axis\n");
     exit(-1);
   }
 
-  if ( mpi_indices_y[0] != root_id_y ){
-    printf(" ERROR: Root id doesn't match mpi_indx list for y axis\n" );
+  if (mpi_indices_y[0] != root_id_y) {
+    printf(" ERROR: Root id doesn't match mpi_indx list for y axis\n");
     exit(-1);
   }
 
-  if ( mpi_indices_z[0] != root_id_z ){
-    printf(" ERROR: Root id doesn't match mpi_indx list for z axis\n" );
+  if (mpi_indices_z[0] != root_id_z) {
+    printf(" ERROR: Root id doesn't match mpi_indx list for z axis\n");
     exit(-1);
   }
 
+  if (procID == root_id_x)
+    am_I_root_x = true;
+  else
+    am_I_root_x = false;
 
-  if ( procID == root_id_x ) am_I_root_x = true;
-  else am_I_root_x = false;
-
+  if (procID == root_id_y)
+    am_I_root_y = true;
+  else
+    am_I_root_y = false;
 
-  if ( procID == root_id_y ) am_I_root_y = true;
-  else am_I_root_y = false;
+  if (procID == root_id_z)
+    am_I_root_z = true;
+  else
+    am_I_root_z = false;
 
+  if (procID == 0) {
+    root_procs_x = (bool *)malloc(nproc * sizeof(bool));
+    root_procs_y = (bool *)malloc(nproc * sizeof(bool));
+    root_procs_z = (bool *)malloc(nproc * sizeof(bool));
+  }
 
+  // Gather the root processes
+  MPI_Gather(&am_I_root_x, 1, MPI_C_BOOL, root_procs_x, 1, MPI_C_BOOL, 0, world);
+  MPI_Gather(&am_I_root_y, 1, MPI_C_BOOL, root_procs_y, 1, MPI_C_BOOL, 0, world);
+  MPI_Gather(&am_I_root_z, 1, MPI_C_BOOL, root_procs_z, 1, MPI_C_BOOL, 0, world);
 
-  if ( procID == root_id_z ) am_I_root_z = true;
-  else am_I_root_z = false;
-  
-  if ( procID == 0 ){
-    root_procs_x = (bool *) malloc(nproc*sizeof(bool));
-    root_procs_y = (bool *) malloc(nproc*sizeof(bool));
-    root_procs_z = (bool *) malloc(nproc*sizeof(bool));
-  }
-  
-  // Gather the root processes 
-  MPI_Gather( &am_I_root_x, 1, MPI_C_BOOL, root_procs_x, 1, MPI_C_BOOL, 0, world );
-  MPI_Gather( &am_I_root_y, 1, MPI_C_BOOL, root_procs_y, 1, MPI_C_BOOL, 0, world );
-  MPI_Gather( &am_I_root_z, 1, MPI_C_BOOL, root_procs_z, 1, MPI_C_BOOL, 0, world );
-  
   int n_skewers_global_x, n_skewers_global_y, n_skewers_global_z;
-  if ( procID == 0 ){
+  if (procID == 0) {
     n_skewers_global_x = 0;
     n_skewers_global_y = 0;
     n_skewers_global_z = 0;
-    for ( int p_id=0; p_id<nproc; p_id++ ){
+    for (int p_id = 0; p_id < nproc; p_id++) {
       n_skewers_global_x += n_skewers_local_x * root_procs_x[p_id];
       n_skewers_global_y += n_skewers_local_y * root_procs_y[p_id];
-      n_skewers_global_z += n_skewers_local_z * root_procs_z[p_id]; 
+      n_skewers_global_z += n_skewers_local_z * root_procs_z[p_id];
     }
-  }  
-  chprintf( "  N Skewers Global:  x:%d  y:%d  z:%d \n" , n_skewers_global_x,  n_skewers_global_y,  n_skewers_global_z );
-  
-  // Allocate Memory for Global Skewers Data
-  #ifdef OUTPUT_SKEWERS
-  if ( procID == 0 ){
-    skewers_transmitted_flux_HI_x_global = (Real *) malloc(n_skewers_global_x*nx_total*sizeof(Real));
-    skewers_transmitted_flux_HI_y_global = (Real *) malloc(n_skewers_global_y*ny_total*sizeof(Real));
-    skewers_transmitted_flux_HI_z_global = (Real *) malloc(n_skewers_global_z*nz_total*sizeof(Real));
-    
-    skewers_transmitted_flux_HeII_x_global = (Real *) malloc(n_skewers_global_x*nx_total*sizeof(Real));
-    skewers_transmitted_flux_HeII_y_global = (Real *) malloc(n_skewers_global_y*ny_total*sizeof(Real));
-    skewers_transmitted_flux_HeII_z_global = (Real *) malloc(n_skewers_global_z*nz_total*sizeof(Real));
-    
-    skewers_density_x_global = (Real *) malloc(n_skewers_global_x*nx_total*sizeof(Real));
-    skewers_density_y_global = (Real *) malloc(n_skewers_global_y*ny_total*sizeof(Real));
-    skewers_density_z_global = (Real *) malloc(n_skewers_global_z*nz_total*sizeof(Real));
-        
-    skewers_HI_density_x_global = (Real *) malloc(n_skewers_global_x*nx_total*sizeof(Real));
-    skewers_HI_density_y_global = (Real *) malloc(n_skewers_global_y*ny_total*sizeof(Real));
-    skewers_HI_density_z_global = (Real *) malloc(n_skewers_global_z*nz_total*sizeof(Real));
-    
-    skewers_HeII_density_x_global = (Real *) malloc(n_skewers_global_x*nx_total*sizeof(Real));
-    skewers_HeII_density_y_global = (Real *) malloc(n_skewers_global_y*ny_total*sizeof(Real));
-    skewers_HeII_density_z_global = (Real *) malloc(n_skewers_global_z*nz_total*sizeof(Real));
-    
-    skewers_temperature_x_global = (Real *) malloc(n_skewers_global_x*nx_total*sizeof(Real));
-    skewers_temperature_y_global = (Real *) malloc(n_skewers_global_y*ny_total*sizeof(Real));
-    skewers_temperature_z_global = (Real *) malloc(n_skewers_global_z*nz_total*sizeof(Real));
-    
-    skewers_los_velocity_x_global = (Real *) malloc(n_skewers_global_x*nx_total*sizeof(Real));
-    skewers_los_velocity_y_global = (Real *) malloc(n_skewers_global_y*ny_total*sizeof(Real));
-    skewers_los_velocity_z_global = (Real *) malloc(n_skewers_global_z*nz_total*sizeof(Real));
-    
-    transfer_buffer_root_x = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    transfer_buffer_root_y = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    transfer_buffer_root_z = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-  }
-  #endif
-  
-  #ifdef PRINT_ANALYSIS_LOG
-  chprintf( " Root Ids X:  \n");
+  }
+  chprintf("  N Skewers Global:  x:%d  y:%d  z:%d \n", n_skewers_global_x, n_skewers_global_y, n_skewers_global_z);
+
+      // Allocate Memory for Global Skewers Data
+      #ifdef OUTPUT_SKEWERS
+  if (procID == 0) {
+    skewers_transmitted_flux_HI_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real));
+    skewers_transmitted_flux_HI_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real));
+    skewers_transmitted_flux_HI_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real));
+
+    skewers_transmitted_flux_HeII_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real));
+    skewers_transmitted_flux_HeII_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real));
+    skewers_transmitted_flux_HeII_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real));
+
+    skewers_density_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real));
+    skewers_density_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real));
+    skewers_density_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real));
+
+    skewers_HI_density_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real));
+    skewers_HI_density_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real));
+    skewers_HI_density_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real));
+
+    skewers_HeII_density_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real));
+    skewers_HeII_density_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real));
+    skewers_HeII_density_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real));
+
+    skewers_temperature_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real));
+    skewers_temperature_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real));
+    skewers_temperature_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real));
+
+    skewers_los_velocity_x_global = (Real *)malloc(n_skewers_global_x * nx_total * sizeof(Real));
+    skewers_los_velocity_y_global = (Real *)malloc(n_skewers_global_y * ny_total * sizeof(Real));
+    skewers_los_velocity_z_global = (Real *)malloc(n_skewers_global_z * nz_total * sizeof(Real));
+
+    transfer_buffer_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+    transfer_buffer_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+    transfer_buffer_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+  }
+      #endif
+
+      #ifdef PRINT_ANALYSIS_LOG
+  chprintf(" Root Ids X:  \n");
   MPI_Barrier(world);
   sleep(1);
-  if ( am_I_root_x  ){
-    printf( "  pID: %d  \n", procID );
+  if (am_I_root_x) {
+    printf("  pID: %d  \n", procID);
   }
   MPI_Barrier(world);
   sleep(1);
 
-  chprintf( " Root Ids Y:  \n");
+  chprintf(" Root Ids Y:  \n");
   MPI_Barrier(world);
   sleep(1);
-  if ( am_I_root_y  ){
-    printf( "  pID: %d  \n", procID );
+  if (am_I_root_y) {
+    printf("  pID: %d  \n", procID);
   }
   MPI_Barrier(world);
   sleep(1);
 
-
-  chprintf( " Root Ids Z:  \n");
+  chprintf(" Root Ids Z:  \n");
   MPI_Barrier(world);
   sleep(1);
-  if ( am_I_root_z  ){
-    printf( "  pID: %d  \n", procID );
+  if (am_I_root_z) {
+    printf("  pID: %d  \n", procID);
   }
   MPI_Barrier(world);
   sleep(1);
-  
-  
-  if ( procID == 0 ){
-    printf( "Root procs x: " );
-    for ( int p_id=0; p_id<nproc; p_id++ ){
-      printf( " %d ", root_procs_x[p_id] );
+
+  if (procID == 0) {
+    printf("Root procs x: ");
+    for (int p_id = 0; p_id < nproc; p_id++) {
+      printf(" %d ", root_procs_x[p_id]);
     }
-    printf( "\n" );
-    
-    printf( "Root procs y: " );
-    for ( int p_id=0; p_id<nproc; p_id++ ){
-      printf( " %d ", root_procs_y[p_id] );
+    printf("\n");
+
+    printf("Root procs y: ");
+    for (int p_id = 0; p_id < nproc; p_id++) {
+      printf(" %d ", root_procs_y[p_id]);
     }
-    printf( "\n" );
-    
-    printf( "Root procs z: " );
-    for ( int p_id=0; p_id<nproc; p_id++ ){
-      printf( " %d ", root_procs_z[p_id] );
+    printf("\n");
+
+    printf("Root procs z: ");
+    for (int p_id = 0; p_id < nproc; p_id++) {
+      printf(" %d ", root_procs_z[p_id]);
     }
-    printf( "\n" );
-  }
-  #endif
-  
-  
-  if ( am_I_root_x ){
-    skewers_HI_density_root_x    = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    skewers_HeII_density_root_x  = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    skewers_velocity_root_x      = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    skewers_temperature_root_x   = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    full_HI_density_x            = (Real *) malloc(n_los_full_x*sizeof(Real));
-    full_HeII_density_x          = (Real *) malloc(n_los_full_x*sizeof(Real));
-    full_velocity_x              = (Real *) malloc(n_los_full_x*sizeof(Real));
-    full_temperature_x           = (Real *) malloc(n_los_full_x*sizeof(Real));
-    full_optical_depth_HI_x      = (Real *) malloc(n_los_full_x*sizeof(Real));
-    full_optical_depth_HeII_x    = (Real *) malloc(n_los_full_x*sizeof(Real));
-    full_vel_Hubble_x            = (Real *) malloc(n_los_full_x*sizeof(Real));
-    skewers_transmitted_flux_HI_x   = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    skewers_transmitted_flux_HeII_x = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    #if OUTPUT_SKEWERS
-    skewers_density_root_x    = (Real *) malloc(n_skewers_local_x*nx_total*sizeof(Real));
-    #endif
-    
+    printf("\n");
+  }
+      #endif
+
+  if (am_I_root_x) {
+    skewers_HI_density_root_x       = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+    skewers_HeII_density_root_x     = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+    skewers_velocity_root_x         = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+    skewers_temperature_root_x      = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+    full_HI_density_x               = (Real *)malloc(n_los_full_x * sizeof(Real));
+    full_HeII_density_x             = (Real *)malloc(n_los_full_x * sizeof(Real));
+    full_velocity_x                 = (Real *)malloc(n_los_full_x * sizeof(Real));
+    full_temperature_x              = (Real *)malloc(n_los_full_x * sizeof(Real));
+    full_optical_depth_HI_x         = (Real *)malloc(n_los_full_x * sizeof(Real));
+    full_optical_depth_HeII_x       = (Real *)malloc(n_los_full_x * sizeof(Real));
+    full_vel_Hubble_x               = (Real *)malloc(n_los_full_x * sizeof(Real));
+    skewers_transmitted_flux_HI_x   = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+    skewers_transmitted_flux_HeII_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+      #if OUTPUT_SKEWERS
+    skewers_density_root_x = (Real *)malloc(n_skewers_local_x * nx_total * sizeof(Real));
+      #endif
+
     // Alocate Memory For Power Spectrum Calculation
-    delta_F_x             = (Real *) malloc(nx_total*sizeof(Real));
-    vel_Hubble_x          = (Real *) malloc(nx_total*sizeof(Real));
-    fft_delta_F_x         = (fftw_complex*) fftw_malloc(n_fft_x*sizeof(fftw_complex));
-    fft2_delta_F_x        = (Real *) malloc(n_fft_x*sizeof(Real));
-    fftw_plan_x           = fftw_plan_dft_r2c_1d( nx_total, delta_F_x, fft_delta_F_x, FFTW_ESTIMATE);
-  }
-  k_vals_x              = (Real *) malloc(n_fft_x*sizeof(Real));
-
-  if ( am_I_root_y ){
-    skewers_HI_density_root_y    = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    skewers_HeII_density_root_y  = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    skewers_velocity_root_y      = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    skewers_temperature_root_y   = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    full_HI_density_y            = (Real *) malloc(n_los_full_y*sizeof(Real));
-    full_HeII_density_y          = (Real *) malloc(n_los_full_y*sizeof(Real));
-    full_velocity_y              = (Real *) malloc(n_los_full_y*sizeof(Real));
-    full_temperature_y           = (Real *) malloc(n_los_full_y*sizeof(Real));
-    full_optical_depth_HI_y      = (Real *) malloc(n_los_full_y*sizeof(Real));
-    full_optical_depth_HeII_y    = (Real *) malloc(n_los_full_y*sizeof(Real));
-    full_vel_Hubble_y            = (Real *) malloc(n_los_full_y*sizeof(Real));
-    skewers_transmitted_flux_HI_y   = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    skewers_transmitted_flux_HeII_y = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    #if OUTPUT_SKEWERS
-    skewers_density_root_y    = (Real *) malloc(n_skewers_local_y*ny_total*sizeof(Real));
-    #endif
-    
+    delta_F_x      = (Real *)malloc(nx_total * sizeof(Real));
+    vel_Hubble_x   = (Real *)malloc(nx_total * sizeof(Real));
+    fft_delta_F_x  = (fftw_complex *)fftw_malloc(n_fft_x * sizeof(fftw_complex));
+    fft2_delta_F_x = (Real *)malloc(n_fft_x * sizeof(Real));
+    fftw_plan_x    = fftw_plan_dft_r2c_1d(nx_total, delta_F_x, fft_delta_F_x, FFTW_ESTIMATE);
+  }
+  k_vals_x = (Real *)malloc(n_fft_x * sizeof(Real));
+
+  if (am_I_root_y) {
+    skewers_HI_density_root_y       = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+    skewers_HeII_density_root_y     = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+    skewers_velocity_root_y         = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+    skewers_temperature_root_y      = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+    full_HI_density_y               = (Real *)malloc(n_los_full_y * sizeof(Real));
+    full_HeII_density_y             = (Real *)malloc(n_los_full_y * sizeof(Real));
+    full_velocity_y                 = (Real *)malloc(n_los_full_y * sizeof(Real));
+    full_temperature_y              = (Real *)malloc(n_los_full_y * sizeof(Real));
+    full_optical_depth_HI_y         = (Real *)malloc(n_los_full_y * sizeof(Real));
+    full_optical_depth_HeII_y       = (Real *)malloc(n_los_full_y * sizeof(Real));
+    full_vel_Hubble_y               = (Real *)malloc(n_los_full_y * sizeof(Real));
+    skewers_transmitted_flux_HI_y   = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+    skewers_transmitted_flux_HeII_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+      #if OUTPUT_SKEWERS
+    skewers_density_root_y = (Real *)malloc(n_skewers_local_y * ny_total * sizeof(Real));
+      #endif
+
     // Alocate Memory For Power Spectrum Calculation
-    delta_F_y             = (Real *) malloc(ny_total*sizeof(Real));
-    vel_Hubble_y          = (Real *) malloc(ny_total*sizeof(Real));
-    fft_delta_F_y         = (fftw_complex*) fftw_malloc(n_fft_y*sizeof(fftw_complex));
-    fft2_delta_F_y        = (Real *) malloc(n_fft_y*sizeof(Real));
-    fftw_plan_y           = fftw_plan_dft_r2c_1d( ny_total, delta_F_y, fft_delta_F_y, FFTW_ESTIMATE);
-  }
-  k_vals_y              = (Real *) malloc(n_fft_y*sizeof(Real));
-
-  if ( am_I_root_z ){
-    skewers_HI_density_root_z    = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-    skewers_HeII_density_root_z  = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-    skewers_velocity_root_z      = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-    skewers_temperature_root_z   = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-    full_HI_density_z            = (Real *) malloc(n_los_full_z*sizeof(Real));
-    full_HeII_density_z          = (Real *) malloc(n_los_full_z*sizeof(Real));
-    full_velocity_z              = (Real *) malloc(n_los_full_z*sizeof(Real));
-    full_temperature_z           = (Real *) malloc(n_los_full_z*sizeof(Real));
-    full_optical_depth_HI_z      = (Real *) malloc(n_los_full_z*sizeof(Real));
-    full_optical_depth_HeII_z    = (Real *) malloc(n_los_full_z*sizeof(Real));
-    full_vel_Hubble_z            = (Real *) malloc(n_los_full_z*sizeof(Real));
-    skewers_transmitted_flux_HI_z   = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-    skewers_transmitted_flux_HeII_z = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-    #if OUTPUT_SKEWERS
-    skewers_density_root_z    = (Real *) malloc(n_skewers_local_z*nz_total*sizeof(Real));
-    #endif
-    
+    delta_F_y      = (Real *)malloc(ny_total * sizeof(Real));
+    vel_Hubble_y   = (Real *)malloc(ny_total * sizeof(Real));
+    fft_delta_F_y  = (fftw_complex *)fftw_malloc(n_fft_y * sizeof(fftw_complex));
+    fft2_delta_F_y = (Real *)malloc(n_fft_y * sizeof(Real));
+    fftw_plan_y    = fftw_plan_dft_r2c_1d(ny_total, delta_F_y, fft_delta_F_y, FFTW_ESTIMATE);
+  }
+  k_vals_y = (Real *)malloc(n_fft_y * sizeof(Real));
+
+  if (am_I_root_z) {
+    skewers_HI_density_root_z       = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+    skewers_HeII_density_root_z     = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+    skewers_velocity_root_z         = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+    skewers_temperature_root_z      = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+    full_HI_density_z               = (Real *)malloc(n_los_full_z * sizeof(Real));
+    full_HeII_density_z             = (Real *)malloc(n_los_full_z * sizeof(Real));
+    full_velocity_z                 = (Real *)malloc(n_los_full_z * sizeof(Real));
+    full_temperature_z              = (Real *)malloc(n_los_full_z * sizeof(Real));
+    full_optical_depth_HI_z         = (Real *)malloc(n_los_full_z * sizeof(Real));
+    full_optical_depth_HeII_z       = (Real *)malloc(n_los_full_z * sizeof(Real));
+    full_vel_Hubble_z               = (Real *)malloc(n_los_full_z * sizeof(Real));
+    skewers_transmitted_flux_HI_z   = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+    skewers_transmitted_flux_HeII_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+      #if OUTPUT_SKEWERS
+    skewers_density_root_z = (Real *)malloc(n_skewers_local_z * nz_total * sizeof(Real));
+      #endif
+
     // Alocate Memory For Power Spectrum Calculation
-    delta_F_z             = (Real *) malloc(nz_total*sizeof(Real));
-    vel_Hubble_z          = (Real *) malloc(nz_total*sizeof(Real));
-    fft_delta_F_z         = (fftw_complex*) fftw_malloc(n_fft_z*sizeof(fftw_complex));
-    fft2_delta_F_z        = (Real *) malloc(n_fft_z*sizeof(Real));
-    fftw_plan_z           = fftw_plan_dft_r2c_1d( nz_total, delta_F_z, fft_delta_F_z, FFTW_ESTIMATE);
+    delta_F_z      = (Real *)malloc(nz_total * sizeof(Real));
+    vel_Hubble_z   = (Real *)malloc(nz_total * sizeof(Real));
+    fft_delta_F_z  = (fftw_complex *)fftw_malloc(n_fft_z * sizeof(fftw_complex));
+    fft2_delta_F_z = (Real *)malloc(n_fft_z * sizeof(Real));
+    fftw_plan_z    = fftw_plan_dft_r2c_1d(nz_total, delta_F_z, fft_delta_F_z, FFTW_ESTIMATE);
   }
-  k_vals_z              = (Real *) malloc(n_fft_z*sizeof(Real));
+  k_vals_z = (Real *)malloc(n_fft_z * sizeof(Real));
 
-  #else
+    #else
 
   skewers_HI_density_root_x = skewers_HI_density_local_x;
   skewers_HI_density_root_y = skewers_HI_density_local_y;
@@ -1786,40 +1745,31 @@ void Analysis_Module::Initialize_Lya_Statistics( struct parameters *P ){
   skewers_temperature_root_y = skewers_temperature_local_y;
   skewers_temperature_root_z = skewers_temperature_local_z;
 
-  full_HI_density_x  = (Real *) malloc(n_los_full_x*sizeof(Real));
-  full_velocity_x    = (Real *) malloc(n_los_full_x*sizeof(Real));
-  full_temperature_x = (Real *) malloc(n_los_full_x*sizeof(Real));
-  full_optical_depth_x = (Real *) malloc(n_los_full_x*sizeof(Real));
-  full_vel_Hubble_x  = (Real *) malloc(n_los_full_x*sizeof(Real));
-  transmitted_flux_x = (Real *) malloc(nx_total*sizeof(Real));
-
-  full_HI_density_y  = (Real *) malloc(n_los_full_y*sizeof(Real));
-  full_velocity_y    = (Real *) malloc(n_los_full_y*sizeof(Real));
-  full_temperature_y = (Real *) malloc(n_los_full_y*sizeof(Real));
-  full_optical_depth_y = (Real *) malloc(n_los_full_y*sizeof(Real));
-  full_vel_Hubble_y  = (Real *) malloc(n_los_full_y*sizeof(Real));
-  transmitted_flux_y = (Real *) malloc(ny_total*sizeof(Real));
-
-  full_HI_density_z  = (Real *) malloc(n_los_full_z*sizeof(Real));
-  full_velocity_z    = (Real *) malloc(n_los_full_z*sizeof(Real));
-  full_temperature_z = (Real *) malloc(n_los_full_z*sizeof(Real));
-  full_optical_depth_z = (Real *) malloc(n_los_full_z*sizeof(Real));
-  full_vel_Hubble_z  = (Real *) malloc(n_los_full_z*sizeof(Real));
-  transmitted_flux_z = (Real *) malloc(nz_total*sizeof(Real));
-
-
-
-  #endif
-
-
+  full_HI_density_x    = (Real *)malloc(n_los_full_x * sizeof(Real));
+  full_velocity_x      = (Real *)malloc(n_los_full_x * sizeof(Real));
+  full_temperature_x   = (Real *)malloc(n_los_full_x * sizeof(Real));
+  full_optical_depth_x = (Real *)malloc(n_los_full_x * sizeof(Real));
+  full_vel_Hubble_x    = (Real *)malloc(n_los_full_x * sizeof(Real));
+  transmitted_flux_x   = (Real *)malloc(nx_total * sizeof(Real));
+
+  full_HI_density_y    = (Real *)malloc(n_los_full_y * sizeof(Real));
+  full_velocity_y      = (Real *)malloc(n_los_full_y * sizeof(Real));
+  full_temperature_y   = (Real *)malloc(n_los_full_y * sizeof(Real));
+  full_optical_depth_y = (Real *)malloc(n_los_full_y * sizeof(Real));
+  full_vel_Hubble_y    = (Real *)malloc(n_los_full_y * sizeof(Real));
+  transmitted_flux_y   = (Real *)malloc(ny_total * sizeof(Real));
+
+  full_HI_density_z    = (Real *)malloc(n_los_full_z * sizeof(Real));
+  full_velocity_z      = (Real *)malloc(n_los_full_z * sizeof(Real));
+  full_temperature_z   = (Real *)malloc(n_los_full_z * sizeof(Real));
+  full_optical_depth_z = (Real *)malloc(n_los_full_z * sizeof(Real));
+  full_vel_Hubble_z    = (Real *)malloc(n_los_full_z * sizeof(Real));
+  transmitted_flux_z   = (Real *)malloc(nz_total * sizeof(Real));
 
+    #endif
 
   chprintf(" Lya Statistics Initialized.\n");
-
-
-
 }
 
-
-#endif //LYA_STATISTICS
-#endif //ANALYSIS
+  #endif  // LYA_STATISTICS
+#endif    // ANALYSIS
diff --git a/src/analysis/phase_diagram.cpp b/src/analysis/phase_diagram.cpp
index feb7e232f..a3aa1dc3c 100644
--- a/src/analysis/phase_diagram.cpp
+++ b/src/analysis/phase_diagram.cpp
@@ -1,131 +1,131 @@
 #if defined(ANALYSIS) && defined(PHASE_DIAGRAM)
 
-#include <stdio.h>      /* printf */
-#include <math.h>
-#include "../analysis/analysis.h"
-#include "../io/io.h"
+  #include <math.h>
+  #include <stdio.h> /* printf */
 
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif
+  #include "../analysis/analysis.h"
+  #include "../io/io.h"
 
-void Grid3D::Compute_Phase_Diagram(){
+  #ifdef MPI_CHOLLA
+    #include "../mpi/mpi_routines.h"
+  #endif
 
+void Grid3D::Compute_Phase_Diagram()
+{
   int n_temp, n_dens;
   Real temp_min, temp_max, dens_min, dens_max;
   Real log_temp_min, log_temp_max, log_dens_min, log_dens_max;
   Real log_delta_dens, log_delta_temp;
 
-  n_dens = Analysis.n_dens;
-  n_temp = Analysis.n_temp;
+  n_dens   = Analysis.n_dens;
+  n_temp   = Analysis.n_temp;
   dens_min = Analysis.dens_min;
   dens_max = Analysis.dens_max;
   temp_min = Analysis.temp_min;
   temp_max = Analysis.temp_max;
 
-  log_dens_min = log10( dens_min );
-  log_dens_max = log10( dens_max );
-  log_temp_min = log10( temp_min );
-  log_temp_max = log10( temp_max );
-
-  log_delta_dens = ( log_dens_max - log_dens_min ) / n_dens;
-  log_delta_temp = ( log_temp_max - log_temp_min ) / n_temp;
+  log_dens_min = log10(dens_min);
+  log_dens_max = log10(dens_max);
+  log_temp_min = log10(temp_min);
+  log_temp_max = log10(temp_max);
 
+  log_delta_dens = (log_dens_max - log_dens_min) / n_dens;
+  log_delta_temp = (log_temp_max - log_temp_min) / n_temp;
 
   int nx_local, ny_local, nz_local, n_ghost;
   int nx_grid, ny_grid, nz_grid;
   nx_local = Analysis.nx_local;
   ny_local = Analysis.ny_local;
   nz_local = Analysis.nz_local;
-  n_ghost = Analysis.n_ghost;
-  nx_grid = nx_local + 2*n_ghost;
-  ny_grid = ny_local + 2*n_ghost;
-  nz_grid = nz_local + 2*n_ghost;
-
-
+  n_ghost  = Analysis.n_ghost;
+  nx_grid  = nx_local + 2 * n_ghost;
+  ny_grid  = ny_local + 2 * n_ghost;
+  nz_grid  = nz_local + 2 * n_ghost;
 
   Real dens, log_dens, temp, log_temp;
   int k, j, i, id_grid;
   int indx_dens, indx_temp, indx_phase;
 
+  // Clear Phase Dikagram
+  for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] = 0;
+
+  for (k = 0; k < nz_local; k++) {
+    for (j = 0; j < ny_local; j++) {
+      for (i = 0; i < nx_local; i++) {
+        id_grid = (i + n_ghost) + (j + n_ghost) * nx_grid + (k + n_ghost) * nx_grid * ny_grid;
+        dens    = C.density[id_grid] * Cosmo.rho_0_gas / Cosmo.rho_mean_baryon;  // Baryonic overdensity
+  // chprintf( "%f %f \n", dens, temp);
+  #ifdef COOLING_GRACKLE
+        temp = Cool.temperature[id_grid];
+  #elif defined CHEMISTRY_GPU
+        temp = Chem.Fields.temperature_h[id_grid];
+  #else
+        chprintf(
+            "ERROR: Temperature Field is only supported for Grackle Cooling or "
+            "CHEMISTRY_GPU\n");
+        exit(-1);
+  #endif
 
-  //Clear Phase Dikagram
-  for (indx_phase=0; indx_phase<n_temp*n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] = 0;
-
-  for ( k=0; k<nz_local; k++ ){
-    for ( j=0; j<ny_local; j++ ){
-      for ( i=0; i<nx_local; i++ ){
-          id_grid = (i+n_ghost) + (j+n_ghost)*nx_grid + (k+n_ghost)*nx_grid*ny_grid;
-          dens = C.density[id_grid] * Cosmo.rho_0_gas / Cosmo.rho_mean_baryon; // Baryonic overdensity
-          // chprintf( "%f %f \n", dens, temp);
-          #ifdef COOLING_GRACKLE
-          temp = Cool.temperature[id_grid];
-          #elif defined CHEMISTRY_GPU
-          temp = Chem.Fields.temperature_h[id_grid];
-          #else
-          chprintf( "ERROR: Temperature Field is only supported for Grackle Cooling or CHEMISTRY_GPU\n");
-          exit(-1);        
-          #endif
-    
-          if ( dens < dens_min || dens > dens_max || temp < temp_min || temp > temp_max ){
-            // printf("Outside Phase Diagram:  dens:%e   temp:%e \n", dens, temp );
-            continue;
-          }
-          log_dens = log10(dens);
-          log_temp = log10(temp);
-          indx_dens = ( log_dens - log_dens_min ) / log_delta_dens;
-          indx_temp = ( log_temp - log_temp_min ) / log_delta_temp;
-
-          indx_phase = indx_temp + indx_dens*n_temp;
-          if ( indx_phase >= n_dens*n_temp || indx_phase < 0 ){
-            printf("Index outside Phase Diagram:  indx:%d   N:%d  dens:%e   temp:%e  indx_dens:%d  indx_temp:%d   \n", indx_phase, n_dens*n_temp, dens, temp, indx_dens, indx_temp );
-            continue;
-          }
-          Analysis.phase_diagram[indx_phase] += 1;
-
+        if (dens < dens_min || dens > dens_max || temp < temp_min || temp > temp_max) {
+          // printf("Outside Phase Diagram:  dens:%e   temp:%e \n", dens, temp
+          // );
+          continue;
+        }
+        log_dens  = log10(dens);
+        log_temp  = log10(temp);
+        indx_dens = (log_dens - log_dens_min) / log_delta_dens;
+        indx_temp = (log_temp - log_temp_min) / log_delta_temp;
+
+        indx_phase = indx_temp + indx_dens * n_temp;
+        if (indx_phase >= n_dens * n_temp || indx_phase < 0) {
+          printf(
+              "Index outside Phase Diagram:  indx:%d   N:%d  dens:%e   temp:%e "
+              " indx_dens:%d  indx_temp:%d   \n",
+              indx_phase, n_dens * n_temp, dens, temp, indx_dens, indx_temp);
+          continue;
+        }
+        Analysis.phase_diagram[indx_phase] += 1;
       }
     }
   }
 
   // Real phase_sum_local = 0;
-  // for (indx_phase=0; indx_phase<n_temp*n_dens; indx_phase++) phase_sum_local += Analysis.phase_diagram[indx_phase];
-  // printf(" Phase Diagram Sum Local: %f\n", phase_sum_local );
+  // for (indx_phase=0; indx_phase<n_temp*n_dens; indx_phase++) phase_sum_local
+  // += Analysis.phase_diagram[indx_phase]; printf(" Phase Diagram Sum Local:
+  // %f\n", phase_sum_local );
 
   #ifdef MPI_CHOLLA
-  MPI_Reduce( Analysis.phase_diagram, Analysis.phase_diagram_global, n_temp*n_dens,  MPI_FLOAT,  MPI_SUM, 0,  world );
-  if ( procID == 0) for (indx_phase=0; indx_phase<n_temp*n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] = Analysis.phase_diagram_global[indx_phase];
+  MPI_Reduce(Analysis.phase_diagram, Analysis.phase_diagram_global, n_temp * n_dens, MPI_FLOAT, MPI_SUM, 0, world);
+  if (procID == 0)
+    for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++)
+      Analysis.phase_diagram[indx_phase] = Analysis.phase_diagram_global[indx_phase];
   #endif
 
-  //Compute the sum for normalization
+  // Compute the sum for normalization
   Real phase_sum = 0;
-  for (indx_phase=0; indx_phase<n_temp*n_dens; indx_phase++) phase_sum += Analysis.phase_diagram[indx_phase];
-  chprintf(" Phase Diagram Sum Global: %f\n", phase_sum );
-
-  //Normalize the Phase Diagram
-  for (indx_phase=0; indx_phase<n_temp*n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] /= phase_sum;
-
-
+  for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) phase_sum += Analysis.phase_diagram[indx_phase];
+  chprintf(" Phase Diagram Sum Global: %f\n", phase_sum);
 
+  // Normalize the Phase Diagram
+  for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] /= phase_sum;
 }
 
-
-void Analysis_Module::Initialize_Phase_Diagram( struct parameters *P ){
-
-  //Size of the diagram
-  n_dens = 1000;
-  n_temp = 1000;
+void AnalysisModule::Initialize_Phase_Diagram(struct Parameters *P)
+{
+  // Size of the diagram
+  n_dens   = 1000;
+  n_temp   = 1000;
   dens_min = 1e-3;
   dens_max = 1e6;
   temp_min = 1e0;
   temp_max = 1e8;
 
-  phase_diagram = (float *) malloc(n_dens*n_temp*sizeof(float));
+  phase_diagram = (float *)malloc(n_dens * n_temp * sizeof(float));
 
   #ifdef MPI_CHOLLA
-  if (procID == 0) phase_diagram_global = (float *) malloc(n_dens*n_temp*sizeof(float));
+  if (procID == 0) phase_diagram_global = (float *)malloc(n_dens * n_temp * sizeof(float));
   #endif
   chprintf(" Phase Diagram Initialized.\n");
-
 }
 
 #endif
diff --git a/src/chemistry_gpu/chemistry_functions.cpp b/src/chemistry_gpu/chemistry_functions.cpp
index 2564d2591..7999a6d55 100644
--- a/src/chemistry_gpu/chemistry_functions.cpp
+++ b/src/chemistry_gpu/chemistry_functions.cpp
@@ -1,324 +1,318 @@
 #ifdef CHEMISTRY_GPU
 
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../utils/hydro_utilities.h"
+  #include "../utils/mhd_utilities.h"
+  #include "chemistry_gpu.h"
+  #include "rates.cuh"
+
+  #ifdef DE
+    #include "../hydro/hydro_cuda.h"
+  #endif
 
-#include "chemistry_gpu.h"
-#include "../grid/grid3D.h"
-#include "../io/io.h"
-#include "rates.cuh"
-
-#ifdef DE
-#include"../hydro/hydro_cuda.h"
-#endif
+  #define TINY 1e-20
 
-#define TINY 1e-20
+void Grid3D::Initialize_Chemistry(struct Parameters *P)
+{
+  chprintf("Initializing the GPU Chemistry Solver... \n");
 
-void Grid3D::Initialize_Chemistry( struct parameters *P ){
-  
-  chprintf( "Initializing the GPU Chemistry Solver... \n");
-  
   Chem.nx = H.nx;
   Chem.ny = H.ny;
   Chem.nz = H.nz;
-  
+
   Chem.H.runtime_chemistry_step = 0;
-  
+
   Chem.use_case_B_recombination = false;
-  
+
   // Initialize the Chemistry Header
-  Chem.H.gamma = gama;
+  Chem.H.gamma       = gama;
   Chem.H.N_Temp_bins = 600;
-  Chem.H.Temp_start = 1.0;
-  Chem.H.Temp_end   = 1000000000.0;
-  
+  Chem.H.Temp_start  = 1.0;
+  Chem.H.Temp_end    = 1000000000.0;
+
   Chem.H.H_fraction = INITIAL_FRACTION_HI + INITIAL_FRACTION_HII;
-  
-  Chem.H.H0 = P->H0;
+
+  #ifdef COSMOLOGY
+  Chem.H.H0      = P->H0;
   Chem.H.Omega_M = P->Omega_M;
   Chem.H.Omega_L = P->Omega_L;
-  
-  
+  #endif  // COSMOLOGY
+
   // Set up the units system.
   Real Msun, kpc_cgs, kpc_km, dens_to_CGS;
-  Msun = MSUN_CGS;
-  kpc_cgs = KPC_CGS;
-  kpc_km  = KPC;
-  dens_to_CGS = Cosmo.rho_0_gas * Msun / kpc_cgs / kpc_cgs / kpc_cgs * Cosmo.cosmo_h * Cosmo.cosmo_h;
-  
+  Msun        = MSUN_CGS;
+  kpc_cgs     = KPC_CGS;
+  kpc_km      = KPC;
+  dens_to_CGS = Msun / kpc_cgs / kpc_cgs / kpc_cgs;
+  #ifdef COSMOLOGY
+  dens_to_CGS = dens_to_CGS * Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h;
+  #endif  // COSMOLOGY
+
   // These are conversions from code units to cgs. Following Grackle
-  Chem.H.a_value = Cosmo.current_a;
-  Chem.H.density_units  = dens_to_CGS / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value ;
-  Chem.H.length_units   = kpc_cgs / Cosmo.cosmo_h * Chem.H.a_value;
-  Chem.H.time_units     = kpc_km / Cosmo.cosmo_h ;
-  Chem.H.velocity_units = Chem.H.length_units /Chem.H.time_units; 
-  Chem.H.dens_number_conv = Chem.H.density_units * pow(Chem.H.a_value, 3) / MH;
-  
+  Chem.H.density_units    = dens_to_CGS;
+  Chem.H.length_units     = kpc_cgs;
+  Chem.H.time_units       = kpc_km;
+  Chem.H.dens_number_conv = Chem.H.density_units / MH;
+  #ifdef COSMOLOGY
+  Chem.H.a_value          = Cosmo.current_a;
+  Chem.H.density_units    = Chem.H.density_units / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value;
+  Chem.H.length_units     = Chem.H.length_units / Cosmo.cosmo_h * Chem.H.a_value;
+  Chem.H.time_units       = Chem.H.time_units / Cosmo.cosmo_h;
+  Chem.H.dens_number_conv = Chem.H.density_number_conv * pow(Chem.H.a_value, 3);
+  #endif  // COSMOLOGY
+  Chem.H.velocity_units = Chem.H.length_units / Chem.H.time_units;
+
   Real dens_base, length_base, time_base;
-  dens_base   = Chem.H.density_units * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value;
-  length_base = Chem.H.length_units / Chem.H.a_value;
-  time_base   = Chem.H.time_units;   
-  Chem.H.cooling_units   = ( pow(length_base, 2) * pow(MH, 2) ) / ( dens_base * pow(time_base, 3) );
-  Chem.H.reaction_units = MH / (dens_base * time_base );
+  dens_base   = Chem.H.density_units;
+  length_base = Chem.H.length_units;
+  #ifdef COSMOLOGY
+  dens_base   = dens_base * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value;
+  length_base = length_base / Chem.H.a_value;
+  #endif  // COSMOLOGY
+
+  time_base             = Chem.H.time_units;
+  Chem.H.cooling_units  = (pow(length_base, 2) * pow(MH, 2)) / (dens_base * pow(time_base, 3));
+  Chem.H.reaction_units = MH / (dens_base * time_base);
   // printf(" cooling_units: %e\n", Chem.H.cooling_units );
   // printf(" reaction_units: %e\n", Chem.H.reaction_units );
-  
+
   Chem.H.max_iter = 10000;
-  
+
   // Initialize all the rates
-  Chem.Initialize( P );
-  
+  Chem.Initialize(P);
+
   #ifdef COSMOLOGY
   // Real kpc_cgs = KPC_CGS;
-  Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / pow( kpc_cgs, 3) * MSUN_CGS ; 
-  Chem.H.energy_conversion  =  Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10;  //km^2 -> cm^2 ;
-  #else // Not COSMOLOGY
+  Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / pow(kpc_cgs, 3) * MSUN_CGS;
+  Chem.H.energy_conversion  = Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10;  // km^2 -> cm^2 ;
+  #else                                                              // Not COSMOLOGY
   Chem.H.density_conversion = 1.0;
   Chem.H.energy_conversion  = 1.0;
   #endif
-  Chem.H.n_uvb_rates_samples  = Chem.n_uvb_rates_samples;
-  Chem.H.uvb_rates_redshift_d = Chem.rates_z_d;
-  Chem.H.photo_ion_HI_rate_d   = Chem.Ion_rates_HI_d;
-  Chem.H.photo_ion_HeI_rate_d  = Chem.Ion_rates_HeI_d;
-  Chem.H.photo_ion_HeII_rate_d = Chem.Ion_rates_HeII_d;
+  Chem.H.n_uvb_rates_samples    = Chem.n_uvb_rates_samples;
+  Chem.H.uvb_rates_redshift_d   = Chem.rates_z_d;
+  Chem.H.photo_ion_HI_rate_d    = Chem.Ion_rates_HI_d;
+  Chem.H.photo_ion_HeI_rate_d   = Chem.Ion_rates_HeI_d;
+  Chem.H.photo_ion_HeII_rate_d  = Chem.Ion_rates_HeII_d;
   Chem.H.photo_heat_HI_rate_d   = Chem.Heat_rates_HI_d;
   Chem.H.photo_heat_HeI_rate_d  = Chem.Heat_rates_HeI_d;
   Chem.H.photo_heat_HeII_rate_d = Chem.Heat_rates_HeII_d;
-  
-  chprintf( "Allocating Memory. \n\n");
-  int n_cells = H.nx * H.ny * H.nz;
-  Chem.Fields.temperature_h = (Real *) malloc(n_cells * sizeof(Real));
-  
-  chprintf( "Chemistry Solver Successfully Initialized. \n\n");
 
+  chprintf("Allocating Memory. \n\n");
+  int n_cells               = H.nx * H.ny * H.nz;
+  Chem.Fields.temperature_h = (Real *)malloc(n_cells * sizeof(Real));
+
+  chprintf("Chemistry Solver Successfully Initialized. \n\n");
 }
 
+void Chem_GPU::Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Function_T rate_function, Real units)
+{
+  // Host array for storing the rates
+  Real *rate_table_array_h = (Real *)malloc(H.N_Temp_bins * sizeof(Real));
 
-void Chem_GPU::Generate_Reaction_Rate_Table( Real **rate_table_array_d, Rate_Function_T rate_function, Real units  ){
-  
-  // Host array for storing the rates 
-  Real *rate_table_array_h = (Real *) malloc( H.N_Temp_bins * sizeof(Real) );
-  
-  //Get the temperature spacing.
+  // Get the temperature spacing.
   Real T, logT, logT_start, d_logT;
   logT_start = log(H.Temp_start);
-  d_logT     = ( log(H.Temp_end) - logT_start ) / ( H.N_Temp_bins - 1 );
-  
+  d_logT     = (log(H.Temp_end) - logT_start) / (H.N_Temp_bins - 1);
+
   // Evaluate the rate at each temperature.
-  for (int i=0; i<H.N_Temp_bins; i++){
+  for (int i = 0; i < H.N_Temp_bins; i++) {
     rate_table_array_h[i] = CHEM_TINY;
-    logT = logT_start + i*d_logT;
-    T = exp(logT);
-    rate_table_array_h[i] = rate_function( T, units );
+    logT                  = logT_start + i * d_logT;
+    T                     = exp(logT);
+    rate_table_array_h[i] = rate_function(T, units);
   }
-  
+
   // Allocate the device array for the rate and copy from host
-  Allocate_Array_GPU_Real( rate_table_array_d, H.N_Temp_bins );
-  Copy_Real_Array_to_Device( H.N_Temp_bins, rate_table_array_h, *rate_table_array_d );
-  
+  Allocate_Array_GPU_Real(rate_table_array_d, H.N_Temp_bins);
+  Copy_Real_Array_to_Device(H.N_Temp_bins, rate_table_array_h, *rate_table_array_d);
+
   // Free the host array
-  free( rate_table_array_h );
-  
+  free(rate_table_array_h);
 }
 
-
-void Chem_GPU::Initialize( struct parameters *P ){
-  
+void Chem_GPU::Initialize(struct Parameters *P)
+{
   Initialize_Cooling_Rates();
-  
+
   Initialize_Reaction_Rates();
 
-  Initialize_UVB_Ionization_and_Heating_Rates( P );
-  
-  
-  
+  Initialize_UVB_Ionization_and_Heating_Rates(P);
 }
 
-void Chem_GPU::Initialize_Cooling_Rates( ){
-  
-  chprintf( " Initializing Cooling Rates... \n");
+void Chem_GPU::Initialize_Cooling_Rates()
+{
+  chprintf(" Initializing Cooling Rates... \n");
   Real units = H.cooling_units;
-  Generate_Reaction_Rate_Table( &H.cool_ceHI_d,   cool_ceHI_rate,   units );
-  Generate_Reaction_Rate_Table( &H.cool_ceHeI_d,  cool_ceHeI_rate,  units );
-  Generate_Reaction_Rate_Table( &H.cool_ceHeII_d, cool_ceHeII_rate, units );
-  
-  Generate_Reaction_Rate_Table( &H.cool_ciHI_d,   cool_ciHI_rate,   units );
-  Generate_Reaction_Rate_Table( &H.cool_ciHeI_d,  cool_ciHeI_rate,  units );
-  Generate_Reaction_Rate_Table( &H.cool_ciHeII_d, cool_ciHeII_rate, units );
-  Generate_Reaction_Rate_Table( &H.cool_ciHeIS_d, cool_ciHeIS_rate, units );
-  
-  if ( ! use_case_B_recombination ){
-    Generate_Reaction_Rate_Table( &H.cool_reHII_d,   cool_reHII_rate_case_A,   units );
-    Generate_Reaction_Rate_Table( &H.cool_reHeII1_d, cool_reHeII1_rate_case_A, units ); 
-    Generate_Reaction_Rate_Table( &H.cool_reHeIII_d, cool_reHeIII_rate_case_A, units );
+  Generate_Reaction_Rate_Table(&H.cool_ceHI_d, cool_ceHI_rate, units);
+  Generate_Reaction_Rate_Table(&H.cool_ceHeI_d, cool_ceHeI_rate, units);
+  Generate_Reaction_Rate_Table(&H.cool_ceHeII_d, cool_ceHeII_rate, units);
+
+  Generate_Reaction_Rate_Table(&H.cool_ciHI_d, cool_ciHI_rate, units);
+  Generate_Reaction_Rate_Table(&H.cool_ciHeI_d, cool_ciHeI_rate, units);
+  Generate_Reaction_Rate_Table(&H.cool_ciHeII_d, cool_ciHeII_rate, units);
+  Generate_Reaction_Rate_Table(&H.cool_ciHeIS_d, cool_ciHeIS_rate, units);
+
+  if (!use_case_B_recombination) {
+    Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_A, units);
+    Generate_Reaction_Rate_Table(&H.cool_reHeII_1_d, cool_reHeII1_rate_case_A, units);
+    Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_A, units);
   } else {
-    Generate_Reaction_Rate_Table( &H.cool_reHII_d,   cool_reHII_rate_case_B,   units );
-    Generate_Reaction_Rate_Table( &H.cool_reHeII1_d, cool_reHeII1_rate_case_B, units );
-    Generate_Reaction_Rate_Table( &H.cool_reHeIII_d, cool_reHeIII_rate_case_B, units );
+    Generate_Reaction_Rate_Table(&H.cool_reHII_d, cool_reHII_rate_case_B, units);
+    Generate_Reaction_Rate_Table(&H.cool_reHeII_1_d, cool_reHeII1_rate_case_B, units);
+    Generate_Reaction_Rate_Table(&H.cool_reHeIII_d, cool_reHeIII_rate_case_B, units);
   }
-  Generate_Reaction_Rate_Table( &H.cool_reHeII2_d, cool_reHeII2_rate, units );
-  
-  Generate_Reaction_Rate_Table( &H.cool_brem_d, cool_brem_rate, units );
-  
+  Generate_Reaction_Rate_Table(&H.cool_reHeII_2_d, cool_reHeII2_rate, units);
+
+  Generate_Reaction_Rate_Table(&H.cool_brem_d, cool_brem_rate, units);
+
   H.cool_compton = 5.65e-36 / units;
-    
 }
 
-void Chem_GPU::Initialize_Reaction_Rates(){
-  
-  chprintf( " Initializing Reaction Rates... \n");
+void Chem_GPU::Initialize_Reaction_Rates()
+{
+  chprintf(" Initializing Reaction Rates... \n");
   Real units = H.reaction_units;
-  Generate_Reaction_Rate_Table( &H.k_coll_i_HI_d,      coll_i_HI_rate,     units );
-  Generate_Reaction_Rate_Table( &H.k_coll_i_HeI_d,     coll_i_HeI_rate,    units );
-  Generate_Reaction_Rate_Table( &H.k_coll_i_HeII_d,    coll_i_HeII_rate,   units );
-  Generate_Reaction_Rate_Table( &H.k_coll_i_HI_HI_d,   coll_i_HI_HI_rate,  units );
-  Generate_Reaction_Rate_Table( &H.k_coll_i_HI_HeI_d,  coll_i_HI_HeI_rate, units );
-  
-  if ( ! use_case_B_recombination ){  
-    Generate_Reaction_Rate_Table( &H.k_recomb_HII_d,   recomb_HII_rate_case_A,   units );
-    Generate_Reaction_Rate_Table( &H.k_recomb_HeII_d,  recomb_HeII_rate_case_A,  units );
-    Generate_Reaction_Rate_Table( &H.k_recomb_HeIII_d, recomb_HeIII_rate_case_A, units );  
+  Generate_Reaction_Rate_Table(&H.k_coll_i_HI_d, coll_i_HI_rate, units);
+  Generate_Reaction_Rate_Table(&H.k_coll_i_HeI_d, coll_i_HeI_rate, units);
+  Generate_Reaction_Rate_Table(&H.k_coll_i_HeII_d, coll_i_HeII_rate, units);
+  Generate_Reaction_Rate_Table(&H.k_coll_i_HI_HI_d, coll_i_HI_HI_rate, units);
+  Generate_Reaction_Rate_Table(&H.k_coll_i_HI_HeI_d, coll_i_HI_HeI_rate, units);
+
+  if (!use_case_B_recombination) {
+    Generate_Reaction_Rate_Table(&H.k_recomb_HII_d, recomb_HII_rate_case_A, units);
+    Generate_Reaction_Rate_Table(&H.k_recomb_HeII_d, recomb_HeII_rate_case_A, units);
+    Generate_Reaction_Rate_Table(&H.k_recomb_HeIII_d, recomb_HeIII_rate_case_A, units);
   } else {
-    Generate_Reaction_Rate_Table( &H.k_recomb_HII_d,   recomb_HII_rate_case_B,   units );
-    Generate_Reaction_Rate_Table( &H.k_recomb_HeII_d,  recomb_HeII_rate_case_B,  units );
-    Generate_Reaction_Rate_Table( &H.k_recomb_HeIII_d, recomb_HeIII_rate_case_B, units );
+    Generate_Reaction_Rate_Table(&H.k_recomb_HII_d, recomb_HII_rate_case_B, units);
+    Generate_Reaction_Rate_Table(&H.k_recomb_HeII_d, recomb_HeII_rate_case_B, units);
+    Generate_Reaction_Rate_Table(&H.k_recomb_HeIII_d, recomb_HeIII_rate_case_B, units);
   }
 }
 
-void Chem_GPU::Initialize_UVB_Ionization_and_Heating_Rates( struct parameters *P ){
-  
-  
-  chprintf( " Initializing UVB Rates... \n");
-  Load_UVB_Ionization_and_Heating_Rates( P );
-  
-  Copy_UVB_Rates_to_GPU( );
-  
+void Chem_GPU::Initialize_UVB_Ionization_and_Heating_Rates(struct Parameters *P)
+{
+  chprintf(" Initializing UVB Rates... \n");
+  Load_UVB_Ionization_and_Heating_Rates(P);
+
+  Copy_UVB_Rates_to_GPU();
+
   #ifdef TEXTURES_UVB_INTERPOLATION
-  Bind_GPU_Textures( n_uvb_rates_samples, Heat_rates_HI_h, Heat_rates_HeI_h, Heat_rates_HeII_h, Ion_rates_HI_h, Ion_rates_HeI_h, Ion_rates_HeII_h);
+  Bind_GPU_Textures(n_uvb_rates_samples, Heat_rates_HI_h, Heat_rates_HeI_h, Heat_rates_HeII_h, Ion_rates_HI_h,
+                    Ion_rates_HeI_h, Ion_rates_HeII_h);
   #endif
 }
 
-void Chem_GPU::Copy_UVB_Rates_to_GPU( ){
-  
-  Allocate_Array_GPU_float( &rates_z_d,         n_uvb_rates_samples );
-  Allocate_Array_GPU_float( &Heat_rates_HI_d,   n_uvb_rates_samples );
-  Allocate_Array_GPU_float( &Heat_rates_HeI_d,  n_uvb_rates_samples );
-  Allocate_Array_GPU_float( &Heat_rates_HeII_d, n_uvb_rates_samples );
-  Allocate_Array_GPU_float( &Ion_rates_HI_d,    n_uvb_rates_samples );
-  Allocate_Array_GPU_float( &Ion_rates_HeI_d,   n_uvb_rates_samples );
-  Allocate_Array_GPU_float( &Ion_rates_HeII_d,  n_uvb_rates_samples );
-  
-  Copy_Float_Array_to_Device( n_uvb_rates_samples, rates_z_h, rates_z_d );
-  Copy_Float_Array_to_Device( n_uvb_rates_samples, Heat_rates_HI_h, Heat_rates_HI_d );
-  Copy_Float_Array_to_Device( n_uvb_rates_samples, Heat_rates_HeI_h, Heat_rates_HeI_d );
-  Copy_Float_Array_to_Device( n_uvb_rates_samples, Heat_rates_HeII_h, Heat_rates_HeII_d );
-  Copy_Float_Array_to_Device( n_uvb_rates_samples, Ion_rates_HI_h, Ion_rates_HI_d );
-  Copy_Float_Array_to_Device( n_uvb_rates_samples, Ion_rates_HeI_h, Ion_rates_HeI_d );
-  Copy_Float_Array_to_Device( n_uvb_rates_samples, Ion_rates_HeII_h, Ion_rates_HeII_d );
-  
+void Chem_GPU::Copy_UVB_Rates_to_GPU()
+{
+  Allocate_Array_GPU_float(&rates_z_d, n_uvb_rates_samples);
+  Allocate_Array_GPU_float(&Heat_rates_HI_d, n_uvb_rates_samples);
+  Allocate_Array_GPU_float(&Heat_rates_HeI_d, n_uvb_rates_samples);
+  Allocate_Array_GPU_float(&Heat_rates_HeII_d, n_uvb_rates_samples);
+  Allocate_Array_GPU_float(&Ion_rates_HI_d, n_uvb_rates_samples);
+  Allocate_Array_GPU_float(&Ion_rates_HeI_d, n_uvb_rates_samples);
+  Allocate_Array_GPU_float(&Ion_rates_HeII_d, n_uvb_rates_samples);
+
+  Copy_Float_Array_to_Device(n_uvb_rates_samples, rates_z_h, rates_z_d);
+  Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HI_h, Heat_rates_HI_d);
+  Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HeI_h, Heat_rates_HeI_d);
+  Copy_Float_Array_to_Device(n_uvb_rates_samples, Heat_rates_HeII_h, Heat_rates_HeII_d);
+  Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HI_h, Ion_rates_HI_d);
+  Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HeI_h, Ion_rates_HeI_d);
+  Copy_Float_Array_to_Device(n_uvb_rates_samples, Ion_rates_HeII_h, Ion_rates_HeII_d);
 }
 
-void Grid3D::Update_Chemistry(){
-  
+void Grid3D::Update_Chemistry()
+{
   #ifdef COSMOLOGY
   Chem.H.current_z = Cosmo.current_z;
   #else
   Chem.H.current_z = 0;
   #endif
-  
-  
-  Do_Chemistry_Update( C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H );
-  
-}
-
 
+  Do_Chemistry_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, Chem.H);
+}
 
-void Grid3D::Compute_Gas_Temperature(  Real *temperature, bool convert_cosmo_units ){
-  
+void Grid3D::Compute_Gas_Temperature(Real *temperature, bool convert_cosmo_units)
+{
   int k, j, i, id;
   Real dens_HI, dens_HII, dens_HeI, dens_HeII, dens_HeIII, dens_e, gamma;
-  Real d, vx, vy, vz, E, GE, mu, temp, cell_dens, cell_n; 
+  Real d, vx, vy, vz, E, GE, mu, temp, cell_dens, cell_n;
   Real current_a, a2;
   gamma = gama;
-  
-    
-  for (k=0; k<H.nz; k++) {
-    for (j=0; j<H.ny; j++) {
-      for (i=0; i<H.nx; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
-        
-        d  =  C.density[id];
-        vx =  C.momentum_x[id] / d;
-        vy =  C.momentum_y[id] / d;
-        vz =  C.momentum_z[id] / d;
-        E = C.Energy[id];
-
-        #ifdef DE
+
+  for (k = 0; k < H.nz; k++) {
+    for (j = 0; j < H.ny; j++) {
+      for (i = 0; i < H.nx; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
+
+        d  = C.density[id];
+        vx = C.momentum_x[id] / d;
+        vy = C.momentum_y[id] / d;
+        vz = C.momentum_z[id] / d;
+        E  = C.Energy[id];
+
+  #ifdef DE
         GE = C.GasEnergy[id];
-        #else 
-        GE = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz));
-        #endif
-        
+  #else
+        GE = E - hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz);
+    #ifdef MHD
+        GE -= mhd::utils::computeMagneticEnergy(C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
+    #endif  // MHD
+  #endif
+
         dens_HI    = C.HI_density[id];
         dens_HII   = C.HII_density[id];
         dens_HeI   = C.HeI_density[id];
         dens_HeII  = C.HeII_density[id];
-        dens_HeIII = C.HeIII_density[id]; 
+        dens_HeIII = C.HeIII_density[id];
         dens_e     = C.e_density[id];
-        
+
         cell_dens = dens_HI + dens_HII + dens_HeI + dens_HeII + dens_HeIII;
-        cell_n =  dens_HI + dens_HII + ( dens_HeI + dens_HeII + dens_HeIII )/4 + dens_e;
-        mu = cell_dens / cell_n;
-        
-        #ifdef COSMOLOGY
-        if ( convert_cosmo_units ){
+        cell_n    = dens_HI + dens_HII + (dens_HeI + dens_HeII + dens_HeIII) / 4 + dens_e;
+        mu        = cell_dens / cell_n;
+
+  #ifdef COSMOLOGY
+        if (convert_cosmo_units) {
           current_a = Cosmo.current_a;
-          a2 = current_a * current_a;
-          GE *= Chem.H.energy_conversion / a2; 
+          a2        = current_a * current_a;
+          GE *= Chem.H.energy_conversion / a2;
         } else {
-          GE *= 1e10; // convert from (km/s)^2 to (cm/s)^2
+          GE *= 1e10;  // convert from (km/s)^2 to (cm/s)^2
         }
-        #endif
-        
-        temp = GE * MP  * mu / d / KB * (gamma - 1.0);  ;
+  #endif
+
+        temp = GE * MP * mu / d / KB * (gamma - 1.0);
+        ;
         temperature[id] = temp;
         // chprintf( "mu: %e \n", mu );
-        // if ( temp > 1e7 ) chprintf( "Temperature: %e   mu: %e \n", temp, mu );  
-        
+        // if ( temp > 1e7 ) chprintf( "Temperature: %e   mu: %e \n", temp, mu
+        // );
       }
-    }  
-  } 
-  
+    }
+  }
 }
 
-void Chem_GPU::Reset(){
-  
-  free( rates_z_h );
-  free( Heat_rates_HI_h );
-  free( Heat_rates_HeI_h );
-  free( Heat_rates_HeII_h );
-  free( Ion_rates_HI_h );
-  free( Ion_rates_HeI_h );
-  free( Ion_rates_HeII_h );
-  
-  Free_Array_GPU_float( rates_z_d );
-  Free_Array_GPU_float( Heat_rates_HI_d );
-  Free_Array_GPU_float( Heat_rates_HeI_d );
-  Free_Array_GPU_float( Heat_rates_HeII_d );
-  Free_Array_GPU_float( Ion_rates_HI_d );
-  Free_Array_GPU_float( Ion_rates_HeI_d );
-  Free_Array_GPU_float( Ion_rates_HeII_d );
-  
-  free( Fields.temperature_h );
-  
+void Chem_GPU::Reset()
+{
+  free(rates_z_h);
+  free(Heat_rates_HI_h);
+  free(Heat_rates_HeI_h);
+  free(Heat_rates_HeII_h);
+  free(Ion_rates_HI_h);
+  free(Ion_rates_HeI_h);
+  free(Ion_rates_HeII_h);
+
+  Free_Array_GPU_float(rates_z_d);
+  Free_Array_GPU_float(Heat_rates_HI_d);
+  Free_Array_GPU_float(Heat_rates_HeI_d);
+  Free_Array_GPU_float(Heat_rates_HeII_d);
+  Free_Array_GPU_float(Ion_rates_HI_d);
+  Free_Array_GPU_float(Ion_rates_HeI_d);
+  Free_Array_GPU_float(Ion_rates_HeII_d);
+
+  free(Fields.temperature_h);
 }
 
-
-
-
-
-
-
-
-
-
-
-#endif
\ No newline at end of file
+#endif
diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu
index 3d6e0052f..7c9bfe2cf 100644
--- a/src/chemistry_gpu/chemistry_functions_gpu.cu
+++ b/src/chemistry_gpu/chemistry_functions_gpu.cu
@@ -1,497 +1,518 @@
 #ifdef CHEMISTRY_GPU
 
-#include "chemistry_gpu.h"
-#include "../hydro/hydro_cuda.h"
-#include "../global/global_cuda.h"
-#include "../io/io.h"
-#include "rates.cuh"
-#include "rates_Katz95.cuh"
-
-#define eV_to_K 1.160451812e4
-#define K_to_eV 8.617333263e-5
-#define n_min 1e-20
-#define tiny  1e-20
-
-#define TPB_CHEM 256
-
-void Chem_GPU::Allocate_Array_GPU_float( float **array_dev, int size ){
-cudaMalloc( (void**)array_dev, size*sizeof(float));
-CudaCheckError();
-}
+  #include "../global/global_cuda.h"
+  #include "../grid/grid_enum.h"
+  #include "../hydro/hydro_cuda.h"
+  #include "../io/io.h"
+  #include "chemistry_gpu.h"
+  #include "rates.cuh"
+  #include "rates_Katz95.cuh"
 
-void Chem_GPU::Copy_Float_Array_to_Device( int size, float *array_h, float *array_d ){
-CudaSafeCall( cudaMemcpy(array_d, array_h, size*sizeof(float), cudaMemcpyHostToDevice ) );
-cudaDeviceSynchronize();
-}
+  #define eV_to_K 1.160451812e4
+  #define K_to_eV 8.617333263e-5
+  #define n_min   1e-20
+  #define tiny    1e-20
 
-void Chem_GPU::Free_Array_GPU_float( float *array_dev ){
-cudaFree( array_dev );
-CudaCheckError();
-}
+  #define TPB_CHEM 256
 
-void Chem_GPU::Allocate_Array_GPU_Real( Real **array_dev, int size ){
-cudaMalloc( (void**)array_dev, size*sizeof(Real));
-CudaCheckError();
+void Chem_GPU::Allocate_Array_GPU_float(float **array_dev, int size)
+{
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(float)));
 }
 
-void Chem_GPU::Copy_Real_Array_to_Device( int size, Real *array_h, Real *array_d ){
-CudaSafeCall( cudaMemcpy(array_d, array_h, size*sizeof(Real), cudaMemcpyHostToDevice ) );
-cudaDeviceSynchronize();
+void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, float *array_d)
+{
+  GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(float), cudaMemcpyHostToDevice));
+  cudaDeviceSynchronize();
 }
 
-void Chem_GPU::Free_Array_GPU_Real( Real *array_dev ){
-cudaFree( array_dev );
-CudaCheckError();
-}
+void Chem_GPU::Free_Array_GPU_float(float *array_dev) { GPU_Error_Check(cudaFree(array_dev)); }
 
-class Thermal_State{
-public: 
-
-Real U;
-Real d;
-Real d_HI;
-Real d_HII;
-Real d_HeI;
-Real d_HeII;
-Real d_HeIII;
-Real d_e;
-
-// Constructor
-__host__ __device__ Thermal_State( Real U_0=1, Real d_0=1, Real d_HI_0=1, Real d_HII_0=0, Real d_HeI_0=1, Real d_HeII_0=0, Real d_HeIII_0=1, Real d_e_0=0    ) :  U(U_0), d(d_0), d_HI(d_HI_0), d_HII(d_HII_0), d_HeI(d_HeI_0), d_HeII(d_HeII_0), d_HeIII(d_HeIII_0), d_e(d_e_0) {}
-
-__host__ __device__ Real get_MMW( ){
-  // Real m_tot = d_HI + d_HII + d_HeI + d_HeII + d_HeIII;
-  Real n_tot =  d_HI + d_HII + 0.25 * ( d_HeI + d_HeII + d_HeIII ) + d_e;  
-  return d / n_tot;
-  // return m_tot / n_tot;
+void Chem_GPU::Allocate_Array_GPU_Real(Real **array_dev, int size)
+{
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real)));
+  GPU_Error_Check();
 }
 
-__host__ __device__ Real get_temperature( Real gamma ){
-  Real mu, temp;
-  mu = get_MMW();
-  temp = (gamma - 1) * mu * U * MP / KB * 1e10;
-  return temp;
+void Chem_GPU::Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d)
+{
+  GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(Real), cudaMemcpyHostToDevice));
+  cudaDeviceSynchronize();
 }
 
-__host__ __device__ Real compute_U( Real temp, Real gamma ){
-  Real mu, U_local;
-  mu = get_MMW();
-  U_local = temp / ( gamma - 1 ) / mu / MP * KB / 1e10;
-  return U_local;
+void Chem_GPU::Free_Array_GPU_Real(Real *array_dev)
+{
+  GPU_Error_Check(cudaFree(array_dev));
+  GPU_Error_Check();
 }
 
+class Thermal_State
+{
+ public:
+  Real U;
+  Real d;
+  Real d_HI;
+  Real d_HII;
+  Real d_HeI;
+  Real d_HeII;
+  Real d_HeIII;
+  Real d_e;
+
+  // Constructor
+  __host__ __device__ Thermal_State(Real U_0 = 1, Real d_0 = 1, Real d_HI_0 = 1, Real d_HII_0 = 0, Real d_HeI_0 = 1,
+                                    Real d_HeII_0 = 0, Real d_HeIII_0 = 1, Real d_e_0 = 0)
+      : U(U_0), d(d_0), d_HI(d_HI_0), d_HII(d_HII_0), d_HeI(d_HeI_0), d_HeII(d_HeII_0), d_HeIII(d_HeIII_0), d_e(d_e_0)
+  {
+  }
+
+  __host__ __device__ Real get_MMW()
+  {
+    // Real m_tot = d_HI + d_HII + d_HeI + d_HeII + d_HeIII;
+    Real n_tot = d_HI + d_HII + 0.25 * (d_HeI + d_HeII + d_HeIII) + d_e;
+    return d / n_tot;
+    // return m_tot / n_tot;
+  }
+
+  __host__ __device__ Real get_temperature(Real gamma)
+  {
+    Real mu, temp;
+    mu   = get_MMW();
+    temp = (gamma - 1) * mu * U * MP / KB * 1e10;
+    return temp;
+  }
+
+  __host__ __device__ Real compute_U(Real temp, Real gamma)
+  {
+    Real mu, U_local;
+    mu      = get_MMW();
+    U_local = temp / (gamma - 1) / mu / MP * KB / 1e10;
+    return U_local;
+  }
 };
 
-__device__ void get_temperature_indx( Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old, bool print ){
-  
+__device__ void get_temperature_indx(Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old,
+                                     bool print)
+{
   Real logT, logT_start, d_logT, logT_l, logT_r;
-  logT = log( 0.5 * ( T + temp_old ) );
-  logT_start = log( Chem_H.Temp_start );
-  logT = fmax( logT_start, logT );
-  logT = fmin( log( Chem_H.Temp_end ), logT ); 
-  d_logT     = ( log( Chem_H.Temp_end ) - logT_start ) / ( Chem_H.N_Temp_bins - 1 );
-  temp_indx = (int) floor( (logT - logT_start) / d_logT );
-  temp_indx = max( 0, temp_indx );
-  temp_indx = min( Chem_H.N_Temp_bins-2, temp_indx );
-  logT_l = logT_start + temp_indx * d_logT;
-  logT_r = logT_start + (temp_indx+1) * d_logT;
-  delta_T = ( logT - logT_l ) / ( logT_r - logT_l );
-  // if (print) printf(" logT_start: %f  logT_end: %f  d_logT: %f   \n", logT_start, log( Chem_H.Temp_end ), d_logT );
-  // if (print) printf(" logT: %f  logT_l: %f  logT_r: %f   \n", logT, logT_l, logT_r );
-   
-}
-
-__device__ Real interpolate_rate( Real *rate_table, int indx, Real delta ){
-  
+  logT       = log(0.5 * (T + temp_old));
+  logT_start = log(Chem_H.Temp_start);
+  logT       = fmax(logT_start, logT);
+  logT       = fmin(log(Chem_H.Temp_end), logT);
+  d_logT     = (log(Chem_H.Temp_end) - logT_start) / (Chem_H.N_Temp_bins - 1);
+  temp_indx  = (int)floor((logT - logT_start) / d_logT);
+  temp_indx  = max(0, temp_indx);
+  temp_indx  = min(Chem_H.N_Temp_bins - 2, temp_indx);
+  logT_l     = logT_start + temp_indx * d_logT;
+  logT_r     = logT_start + (temp_indx + 1) * d_logT;
+  delta_T    = (logT - logT_l) / (logT_r - logT_l);
+  // if (print) printf(" logT_start: %f  logT_end: %f  d_logT: %f   \n",
+  // logT_start, log( Chem_H.Temp_end ), d_logT ); if (print) printf(" logT: %f
+  // logT_l: %f  logT_r: %f   \n", logT, logT_l, logT_r );
+}
+
+__device__ Real interpolate_rate(Real *rate_table, int indx, Real delta)
+{
   Real rate_val;
   rate_val = rate_table[indx];
-  rate_val = rate_val + delta * ( rate_table[indx+1] - rate_val  );
+  rate_val = rate_val + delta * (rate_table[indx + 1] - rate_val);
   return rate_val;
 }
 
-__device__ Real Get_Cooling_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real dens_number_conv, Real current_z, Real temp_prev,
-                                   float photo_h_HI, float photo_h_HeI, float photo_h_HeII, bool print ){
-  
+__device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, Real dens_number_conv, Real current_z,
+                                  Real temp_prev, float photo_h_HI, float photo_h_HeI, float photo_h_HeII, bool print)
+{
   int temp_indx;
   Real temp, delta_T, U_dot;
-  temp = TS.get_temperature( Chem_H.gamma );  
-  get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp_prev, print );
-  if (print) printf("mu: %f  temp: %f  temp_indx: %d  delta_T: %f  \n", TS.get_MMW(), temp, temp_indx, delta_T );                                 
+  temp = TS.get_temperature(Chem_H.gamma);
+  get_temperature_indx(temp, Chem_H, temp_indx, delta_T, temp_prev, print);
+  if (print) printf("mu: %f  temp: %f  temp_indx: %d  delta_T: %f  \n", TS.get_MMW(), temp, temp_indx, delta_T);
   U_dot = 0.0;
-  
+
   // Collisional excitation cooling
-  Real cool_ceHI, cool_ceHeI, cool_ceHeII; 
-  cool_ceHI   = interpolate_rate( Chem_H.cool_ceHI_d,   temp_indx, delta_T ) * TS.d_HI   * TS.d_e;
-  cool_ceHeI  = interpolate_rate( Chem_H.cool_ceHeI_d,  temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0 ;
-  cool_ceHeII = interpolate_rate( Chem_H.cool_ceHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0;
+  Real cool_ceHI, cool_ceHeI, cool_ceHeII;
+  cool_ceHI = interpolate_rate(Chem_H.cool_ceHI_d, temp_indx, delta_T) * TS.d_HI * TS.d_e;
+  cool_ceHeI =
+      interpolate_rate(Chem_H.cool_ceHeI_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0;
+  cool_ceHeII = interpolate_rate(Chem_H.cool_ceHeII_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0;
   U_dot -= cool_ceHI + cool_ceHeI + cool_ceHeII;
-  
+
   // Collisional excitation cooling
   Real cool_ciHI, cool_ciHeI, cool_ciHeII, cool_ciHeIS;
-  cool_ciHI   = interpolate_rate( Chem_H.cool_ciHI_d,   temp_indx, delta_T ) * TS.d_HI   * TS.d_e; 
-  cool_ciHeI  = interpolate_rate( Chem_H.cool_ciHeI_d,  temp_indx, delta_T ) * TS.d_HeI  * TS.d_e / 4.0;
-  cool_ciHeII = interpolate_rate( Chem_H.cool_ciHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0;
-  cool_ciHeIS = interpolate_rate( Chem_H.cool_ciHeIS_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0;
+  cool_ciHI   = interpolate_rate(Chem_H.cool_ciHI_d, temp_indx, delta_T) * TS.d_HI * TS.d_e;
+  cool_ciHeI  = interpolate_rate(Chem_H.cool_ciHeI_d, temp_indx, delta_T) * TS.d_HeI * TS.d_e / 4.0;
+  cool_ciHeII = interpolate_rate(Chem_H.cool_ciHeII_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0;
+  cool_ciHeIS =
+      interpolate_rate(Chem_H.cool_ciHeIS_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0;
   U_dot -= cool_ciHI + cool_ciHeI + cool_ciHeII + cool_ciHeIS;
-  
+
   // Recombination cooling
   Real cool_reHII, cool_reHeII1, cool_reHeII2, cool_reHeIII;
-  cool_reHII   = interpolate_rate( Chem_H.cool_reHII_d,     temp_indx, delta_T ) * TS.d_HII   * TS.d_e; 
-  cool_reHeII1 = interpolate_rate( Chem_H.cool_reHeII1_d,   temp_indx, delta_T ) * TS.d_HeII  * TS.d_e / 4.0;
-  cool_reHeII2 = interpolate_rate( Chem_H.cool_reHeII2_d,   temp_indx, delta_T ) * TS.d_HeII  * TS.d_e / 4.0;
-  cool_reHeIII = interpolate_rate( Chem_H.cool_reHeIII_d,   temp_indx, delta_T ) * TS.d_HeIII * TS.d_e / 4.0;
+  cool_reHII   = interpolate_rate(Chem_H.cool_reHII_d, temp_indx, delta_T) * TS.d_HII * TS.d_e;
+  cool_reHeII1 = interpolate_rate(Chem_H.cool_reHeII_1_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0;
+  cool_reHeII2 = interpolate_rate(Chem_H.cool_reHeII_2_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0;
+  cool_reHeIII = interpolate_rate(Chem_H.cool_reHeIII_d, temp_indx, delta_T) * TS.d_HeIII * TS.d_e / 4.0;
   U_dot -= cool_reHII + cool_reHeII1 + cool_reHeII2 + cool_reHeIII;
-   
+
   // Bremsstrahlung cooling
   Real cool_brem;
-  cool_brem = interpolate_rate( Chem_H.cool_brem_d, temp_indx, delta_T ) * ( TS.d_HII +  TS.d_HeII/4.0 + TS.d_HeIII ) * TS.d_e;
+  cool_brem =
+      interpolate_rate(Chem_H.cool_brem_d, temp_indx, delta_T) * (TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII) * TS.d_e;
   U_dot -= cool_brem;
-  
+
   // Compton cooling or heating
   Real cool_compton, temp_cmb;
-  temp_cmb = 2.73 * ( 1.0 + current_z );
-  cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * ( temp - temp_cmb ) * TS.d_e / dens_number_conv;  
+  temp_cmb     = 2.73 * (1.0 + current_z);
+  cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * (temp - temp_cmb) * TS.d_e / dens_number_conv;
   U_dot -= cool_compton;
-  
+
   // Phothoheating
   Real photo_heat;
-  photo_heat = ( photo_h_HI * TS.d_HI + 0.25 * ( photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII ) ) / dens_number_conv;  
+  photo_heat = (photo_h_HI * TS.d_HI + 0.25 * (photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII)) / dens_number_conv;
   U_dot += photo_heat;
-  
-  if ( temp <= 1.01* Chem_H.Temp_start && fabs( U_dot ) < 0 ) U_dot = tiny; 
-  if ( fabs(U_dot) < tiny ) U_dot = tiny;
-  
-  
-  if (print) printf("HI: %e  \n", TS.d_HI );
-  if (print) printf("HII: %e  \n", TS.d_HII );
-  if (print) printf("HeI: %e  \n", TS.d_HeI );
-  if (print) printf("HeII: %e  \n", TS.d_HeII );
-  if (print) printf("HeIII: %e  \n", TS.d_HeIII );
-  if (print) printf("de: %e  \n", TS.d_e ); 
-  if (print) printf("Cooling ceHI: %e  \n", cool_ceHI );
-  if (print) printf("Cooling ceHeI: %e   \n", cool_ceHeI );
-  if (print) printf("Cooling ceHeII: %e   \n", cool_ceHeII );
-  if (print) printf("Cooling ciHI: %e  \n", cool_ciHI );
-  if (print) printf("Cooling ciHeI: %e  \n", cool_ciHeI );
-  if (print) printf("Cooling ciHeII: %e  \n", cool_ciHeII );
-  if (print) printf("Cooling ciHeIS: %e  \n", cool_ciHeIS );
-  if (print) printf("Cooling reHII: %e  \n", cool_reHII );
-  if (print) printf("Cooling reHeII1: %e  \n", cool_reHeII1 );
-  if (print) printf("Cooling reHeII2: %e  \n", cool_reHeII2 );
-  if (print) printf("Cooling reHeIII: %e  \n", cool_reHeIII );
-  if (print) printf("Cooling brem: %e  \n", cool_brem );
-  if (print) printf("Cooling piHI: %e   rate: %e \n", photo_h_HI, photo_h_HI * TS.d_HI / dens_number_conv  );
-  if (print) printf("Cooling piHeI: %e  rate: %e \n", photo_h_HeI, photo_h_HeI *  TS.d_HeI / dens_number_conv * 0.25  );
-  if (print) printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, photo_h_HeII *  TS.d_HeII / dens_number_conv * 0.25);
-  if (print) printf("Cooling DOM: %e  \n", dens_number_conv );
-  if (print) printf("Cooling compton: %e  \n", cool_compton );
-  if (print) printf("Cooling U_dot: %e  \n", U_dot );
-  
+
+  if (temp <= 1.01 * Chem_H.Temp_start && fabs(U_dot) < 0) U_dot = tiny;
+  if (fabs(U_dot) < tiny) U_dot = tiny;
+
+  if (print) printf("HI: %e  \n", TS.d_HI);
+  if (print) printf("HII: %e  \n", TS.d_HII);
+  if (print) printf("HeI: %e  \n", TS.d_HeI);
+  if (print) printf("HeII: %e  \n", TS.d_HeII);
+  if (print) printf("HeIII: %e  \n", TS.d_HeIII);
+  if (print) printf("de: %e  \n", TS.d_e);
+  if (print) printf("Cooling ceHI: %e  \n", cool_ceHI);
+  if (print) printf("Cooling ceHeI: %e   \n", cool_ceHeI);
+  if (print) printf("Cooling ceHeII: %e   \n", cool_ceHeII);
+  if (print) printf("Cooling ciHI: %e  \n", cool_ciHI);
+  if (print) printf("Cooling ciHeI: %e  \n", cool_ciHeI);
+  if (print) printf("Cooling ciHeII: %e  \n", cool_ciHeII);
+  if (print) printf("Cooling ciHeIS: %e  \n", cool_ciHeIS);
+  if (print) printf("Cooling reHII: %e  \n", cool_reHII);
+  if (print) printf("Cooling reHeII1: %e  \n", cool_reHeII1);
+  if (print) printf("Cooling reHeII2: %e  \n", cool_reHeII2);
+  if (print) printf("Cooling reHeIII: %e  \n", cool_reHeIII);
+  if (print) printf("Cooling brem: %e  \n", cool_brem);
+  if (print) printf("Cooling piHI: %e   rate: %e \n", photo_h_HI, photo_h_HI * TS.d_HI / dens_number_conv);
+  if (print) printf("Cooling piHeI: %e  rate: %e \n", photo_h_HeI, photo_h_HeI * TS.d_HeI / dens_number_conv * 0.25);
+  if (print) printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, photo_h_HeII * TS.d_HeII / dens_number_conv * 0.25);
+  if (print) printf("Cooling DOM: %e  \n", dens_number_conv);
+  if (print) printf("Cooling compton: %e  \n", cool_compton);
+  if (print) printf("Cooling U_dot: %e  \n", U_dot);
 
   return U_dot;
-                                     
 }
 
-__device__ void Get_Reaction_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real &k_coll_i_HI, Real &k_coll_i_HeI, Real &k_coll_i_HeII,
-                                    Real &k_coll_i_HI_HI, Real &k_coll_i_HI_HeI, Real &k_recomb_HII, Real &k_recomb_HeII, Real &k_recomb_HeIII, bool print  ){
-    
+__device__ void Get_Reaction_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, Real &k_coll_i_HI, Real &k_coll_i_HeI,
+                                   Real &k_coll_i_HeII, Real &k_coll_i_HI_HI, Real &k_coll_i_HI_HeI, Real &k_recomb_HII,
+                                   Real &k_recomb_HeII, Real &k_recomb_HeIII, bool print)
+{
   int temp_indx;
   Real temp, delta_T;
-  temp = TS.get_temperature( Chem_H.gamma );  
-  get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp, print );
-  
-  k_coll_i_HI   = interpolate_rate( Chem_H.k_coll_i_HI_d,   temp_indx, delta_T );
-  k_coll_i_HeI  = interpolate_rate( Chem_H.k_coll_i_HeI_d,  temp_indx, delta_T );
-  k_coll_i_HeII = interpolate_rate( Chem_H.k_coll_i_HeII_d, temp_indx, delta_T );
-  
-  k_coll_i_HI_HI  = interpolate_rate( Chem_H.k_coll_i_HI_HI_d,  temp_indx, delta_T );
-  k_coll_i_HI_HeI = interpolate_rate( Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T );
-  
-  k_recomb_HII   = interpolate_rate( Chem_H.k_recomb_HII_d,   temp_indx, delta_T );
-  k_recomb_HeII  = interpolate_rate( Chem_H.k_recomb_HeII_d,  temp_indx, delta_T );
-  k_recomb_HeIII = interpolate_rate( Chem_H.k_recomb_HeIII_d, temp_indx, delta_T );
-  
-  if (print) printf("logT: %f   temp_indx: %d\n", log(temp), temp_indx );
-  if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI );
-  if (print) printf("k_coll_i_HeI: %e \n", k_coll_i_HeI );
-  if (print) printf("k_coll_i_HeII: %e \n", k_coll_i_HeII );
-  if (print) printf("k_coll_i_HI_HI: %e \n", k_coll_i_HI_HI );
-  if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI );  
-  if (print) printf("k_recomb_HII: %e \n", k_recomb_HII );
-  if (print) printf("k_recomb_HeII: %e \n", k_recomb_HeII );
-  if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII );
-  
+  temp = TS.get_temperature(Chem_H.gamma);
+  get_temperature_indx(temp, Chem_H, temp_indx, delta_T, temp, print);
+
+  k_coll_i_HI   = interpolate_rate(Chem_H.k_coll_i_HI_d, temp_indx, delta_T);
+  k_coll_i_HeI  = interpolate_rate(Chem_H.k_coll_i_HeI_d, temp_indx, delta_T);
+  k_coll_i_HeII = interpolate_rate(Chem_H.k_coll_i_HeII_d, temp_indx, delta_T);
+
+  k_coll_i_HI_HI  = interpolate_rate(Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T);
+  k_coll_i_HI_HeI = interpolate_rate(Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T);
+
+  k_recomb_HII   = interpolate_rate(Chem_H.k_recomb_HII_d, temp_indx, delta_T);
+  k_recomb_HeII  = interpolate_rate(Chem_H.k_recomb_HeII_d, temp_indx, delta_T);
+  k_recomb_HeIII = interpolate_rate(Chem_H.k_recomb_HeIII_d, temp_indx, delta_T);
+
+  if (print) printf("logT: %f   temp_indx: %d\n", log(temp), temp_indx);
+  if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI);
+  if (print) printf("k_coll_i_HeI: %e \n", k_coll_i_HeI);
+  if (print) printf("k_coll_i_HeII: %e \n", k_coll_i_HeII);
+  if (print) printf("k_coll_i_HI_HI: %e \n", k_coll_i_HI_HI);
+  if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI);
+  if (print) printf("k_recomb_HII: %e \n", k_recomb_HII);
+  if (print) printf("k_recomb_HeII: %e \n", k_recomb_HeII);
+  if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII);
 }
 
-__device__ int Binary_Search( int N, Real val, float *data, int indx_l, int indx_r ){
+__device__ int Binary_Search(int N, Real val, float *data, int indx_l, int indx_r)
+{
   int n, indx;
-  n = indx_r - indx_l;
-  indx = indx_l + n/2;
-  if ( val >= data[N-1] ) return indx_r;
-  if ( val <= data[0]   ) return indx_l;
-  if ( indx_r == indx_l + 1 ) return indx_l;
-  if ( data[indx] <= val ) indx_l = indx;
-  else indx_r = indx;
-  return Binary_Search( N, val, data, indx_l, indx_r );
+  n    = indx_r - indx_l;
+  indx = indx_l + n / 2;
+  if (val >= data[N - 1]) return indx_r;
+  if (val <= data[0]) return indx_l;
+  if (indx_r == indx_l + 1) return indx_l;
+  if (data[indx] <= val)
+    indx_l = indx;
+  else
+    indx_r = indx;
+  return Binary_Search(N, val, data, indx_l, indx_r);
 }
 
-__device__ Real linear_interpolation( Real delta_x, int indx_l, int indx_r, float*array ){
-  float v_l, v_r; 
+__device__ Real linear_interpolation(Real delta_x, int indx_l, int indx_r, float *array)
+{
+  float v_l, v_r;
   Real v;
   v_l = array[indx_l];
   v_r = array[indx_r];
-  v = delta_x * ( v_r - v_l ) + v_l;
-  return v; 
+  v   = delta_x * (v_r - v_l) + v_l;
+  return v;
 }
 
-__device__ void Get_Current_UVB_Rates( Real current_z, Chemistry_Header &Chem_H,
-                                       float &photo_i_HI, float &photo_i_HeI, float &photo_i_HeII, 
-                                       float &photo_h_HI, float &photo_h_HeI, float &photo_h_HeII, bool print ){
-    
-  if ( current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]){
-    photo_h_HI   = 0;  
-    photo_h_HeI  = 0;  
-    photo_h_HeII = 0;  
-    photo_i_HI   = 0;  
-    photo_i_HeI  = 0;  
-    photo_i_HeII = 0;  
+__device__ void Get_Current_UVB_Rates(Real current_z, Chemistry_Header &Chem_H, float &photo_i_HI, float &photo_i_HeI,
+                                      float &photo_i_HeII, float &photo_h_HI, float &photo_h_HeI, float &photo_h_HeII,
+                                      bool print)
+{
+  if (current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]) {
+    photo_h_HI   = 0;
+    photo_h_HeI  = 0;
+    photo_h_HeII = 0;
+    photo_i_HI   = 0;
+    photo_i_HeI  = 0;
+    photo_i_HeII = 0;
     return;
-    
-  }  
+  }
   // Find closest value of z in rates_z such that z<=current_z
   int indx_l;
   Real z_l, z_r, delta_x;
-  indx_l = Binary_Search( Chem_H.n_uvb_rates_samples, current_z, Chem_H.uvb_rates_redshift_d, 0, Chem_H.n_uvb_rates_samples-1 );
-  z_l = Chem_H.uvb_rates_redshift_d[indx_l];
-  z_r = Chem_H.uvb_rates_redshift_d[indx_l+1];
-  delta_x = (current_z - z_l) / ( z_r - z_l );
-  
-  photo_i_HI   = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HI_rate_d );
-  photo_i_HeI  = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeI_rate_d );
-  photo_i_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeII_rate_d );
-  photo_h_HI   = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HI_rate_d );
-  photo_h_HeI  = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeI_rate_d );
-  photo_h_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeII_rate_d );
-    
-}
+  indx_l  = Binary_Search(Chem_H.n_uvb_rates_samples, current_z, Chem_H.uvb_rates_redshift_d, 0,
+                          Chem_H.n_uvb_rates_samples - 1);
+  z_l     = Chem_H.uvb_rates_redshift_d[indx_l];
+  z_r     = Chem_H.uvb_rates_redshift_d[indx_l + 1];
+  delta_x = (current_z - z_l) / (z_r - z_l);
+
+  photo_i_HI   = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HI_rate_d);
+  photo_i_HeI  = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HeI_rate_d);
+  photo_i_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HeII_rate_d);
+  photo_h_HI   = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HI_rate_d);
+  photo_h_HeI  = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HeI_rate_d);
+  photo_h_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HeII_rate_d);
+}
+
+__device__ Real Get_Chemistry_dt(Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot,
+                                 Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI,
+                                 Real k_coll_i_HI_HeI, Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII,
+                                 float photo_i_HI, float photo_i_HeI, float photo_i_HeII, int n_iter, Real HI_dot_prev,
+                                 Real e_dot_prev, Real t_chem, Real dt_hydro, bool print)
+{
+  Real dt, energy;
+  // Rate of change of HI
+  HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - k_coll_i_HI_HI * TS.d_HI * TS.d_HI -
+           k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - photo_i_HI * TS.d_HI;
+
+  // Rate of change of electron
+  e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + k_coll_i_HeI * TS.d_HeI / 4.0 * TS.d_e +
+          k_coll_i_HeII * TS.d_HeII / 4.0 * TS.d_e + k_coll_i_HI_HI * TS.d_HI * TS.d_HI +
+          +k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - k_recomb_HII * TS.d_HII * TS.d_e -
+          k_recomb_HeII * TS.d_HeII / 4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII / 4.0 * TS.d_e + photo_i_HI * TS.d_HI +
+          photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0;
 
-__device__ Real Get_Chemistry_dt( Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot, 
-                             Real k_coll_i_HI, Real k_coll_i_HeI, Real  k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI,
-                             Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII,
-                             float photo_i_HI, float photo_i_HeI, float photo_i_HeII, 
-                             int n_iter, Real HI_dot_prev, Real e_dot_prev, 
-                             Real t_chem, Real dt_hydro, bool print  ){
-                               
- Real dt, energy;
-  // Rate of change of HI 
-  HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e
-           - k_coll_i_HI_HI * TS.d_HI * TS.d_HI - k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0
-           - photo_i_HI * TS.d_HI;
-  
-  // Rate of change of electron 
-  e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + k_coll_i_HeI * TS.d_HeI/4.0 * TS.d_e  + k_coll_i_HeII * TS.d_HeII/4.0 * TS.d_e
-          + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 
-          - k_recomb_HII * TS.d_HII * TS.d_e - k_recomb_HeII * TS.d_HeII/4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII/4.0 * TS.d_e 
-          + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0;           
-  
   // Bound from below to prevent numerical errors
-  if ( fabs(HI_dot) < tiny ) HI_dot = fmin( tiny, TS.d_HI );
-  if ( fabs(e_dot)  < tiny ) e_dot  = fmin( tiny, TS.d_e );
-  
+  if (fabs(HI_dot) < tiny) HI_dot = fmin(tiny, TS.d_HI);
+  if (fabs(e_dot) < tiny) e_dot = fmin(tiny, TS.d_e);
+
   // If the net rate is almost perfectly balanced then set
   // it to zero (since it is zero to available precision)
-  if ( fmin( fabs(k_coll_i_HI * TS.d_HI * TS.d_e), fabs(k_recomb_HII * TS.d_HII * TS.d_e)  ) / fmax( fabs(HI_dot), fabs(e_dot) ) > 1e6 ){
+  if (fmin(fabs(k_coll_i_HI * TS.d_HI * TS.d_e), fabs(k_recomb_HII * TS.d_HII * TS.d_e)) /
+          fmax(fabs(HI_dot), fabs(e_dot)) >
+      1e6) {
     HI_dot = tiny;
     e_dot  = tiny;
   }
-  
-  if ( n_iter > 50 ){
-    HI_dot = fmin( fabs(HI_dot), fabs( HI_dot_prev) );
-    e_dot  = fmin( fabs(e_dot),  fabs( e_dot_prev) );
+
+  if (n_iter > 50) {
+    HI_dot = fmin(fabs(HI_dot), fabs(HI_dot_prev));
+    e_dot  = fmin(fabs(e_dot), fabs(e_dot_prev));
   }
-  
-  if ( TS.d * Chem_H.dens_number_conv > 1e8 && U_dot > 0 ){
-    printf( "#### Equlibrium  \n" );
+
+  if (TS.d * Chem_H.dens_number_conv > 1e8 && U_dot > 0) {
+    printf("#### Equlibrium  \n");
   }
-  
+
   #ifdef TEMPERATURE_FLOOR
-  if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR )  TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); 
+  if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma);
   #endif
-  
-  energy = fmax( TS.U * TS.d, tiny );
-  dt = fmin( fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot )  );
-  dt = fmin( fabs( 0.1 * energy / U_dot ), dt  );
-  dt = fmin( 0.5 * dt_hydro, dt );
-  dt = fmin( dt_hydro - t_chem, dt );
-  
-  if ( n_iter == Chem_H.max_iter-1 ){
-    printf("##### Chem_GPU: dt_hydro: %e   t_chem: %e   dens: %e   temp: %e  GE: %e  U_dot: %e   dt_HI: %e   dt_e: %e   dt_U: %e \n", dt_hydro,  t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ), fabs( 0.1 * TS.U * TS.d / U_dot )   ) ;
+
+  energy = fmax(TS.U * TS.d, tiny);
+  dt     = fmin(fabs(0.1 * TS.d_HI / HI_dot), fabs(0.1 * TS.d_e / e_dot));
+  dt     = fmin(fabs(0.1 * energy / U_dot), dt);
+  dt     = fmin(0.5 * dt_hydro, dt);
+  dt     = fmin(dt_hydro - t_chem, dt);
+
+  if (n_iter == Chem_H.max_iter - 1) {
+    printf(
+        "##### Chem_GPU: dt_hydro: %e   t_chem: %e   dens: %e   temp: %e  GE: "
+        "%e  U_dot: %e   dt_HI: %e   dt_e: %e   dt_U: %e \n",
+        dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, fabs(0.1 * TS.d_HI / HI_dot),
+        fabs(0.1 * TS.d_e / e_dot), fabs(0.1 * TS.U * TS.d / U_dot));
   }
-  
-  
-  if (print) printf("HIdot: %e\n", HI_dot ); 
-  if (print) printf("edot: %e\n", e_dot );     
-  if (print) printf("energy: %e\n", TS.U * TS.d );     
-  if (print) printf("Udot: %e\n", U_dot );     
-  if (print) printf("dt_hydro: %e\n", dt_hydro );
-  if (print) printf("dt: %e\n", dt );     
-  
-  return dt;                                      
-  
+
+  if (print) printf("HIdot: %e\n", HI_dot);
+  if (print) printf("edot: %e\n", e_dot);
+  if (print) printf("energy: %e\n", TS.U * TS.d);
+  if (print) printf("Udot: %e\n", U_dot);
+  if (print) printf("dt_hydro: %e\n", dt_hydro);
+  if (print) printf("dt: %e\n", dt);
+
+  return dt;
 }
 
-__device__ void Update_Step( Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, 
-                             Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI,
-                             Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, 
-                             float photo_i_HI, float photo_i_HeI, float photo_i_HeII, 
-                             Real &HI_dot_prev, Real &e_dot_prev, Real &temp_prev, bool print ){
-               
-  Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p,  d_e_p;
+__device__ void Update_Step(Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI,
+                            Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI,
+                            Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, float photo_i_HI,
+                            float photo_i_HeI, float photo_i_HeII, Real &HI_dot_prev, Real &e_dot_prev, Real &temp_prev,
+                            bool print)
+{
+  Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p, d_e_p;
   Real s_coef, a_coef;
-  
+
   // Update HI
   s_coef = k_recomb_HII * TS.d_HII * TS.d_e;
-  a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + k_coll_i_HI_HeI * TS.d_HeI/4.0 + photo_i_HI;
-  d_HI_p = ( dt * s_coef + TS.d_HI ) / ( 1.0 + dt*a_coef ); 
-  if ( print ) printf("Update HI  s_coef: %e    a_coef: %e   HIp: %e \n", s_coef, a_coef, d_HI_p );
-  
+  a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + k_coll_i_HI_HeI * TS.d_HeI / 4.0 + photo_i_HI;
+  d_HI_p = (dt * s_coef + TS.d_HI) / (1.0 + dt * a_coef);
+  if (print) printf("Update HI  s_coef: %e    a_coef: %e   HIp: %e \n", s_coef, a_coef, d_HI_p);
+
   // Update HII
-  s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 + photo_i_HI * d_HI_p;
-  a_coef = k_recomb_HII * TS.d_e;
-  d_HII_p = ( dt * s_coef + TS.d_HII ) / ( 1.0 + dt*a_coef ); 
-  if ( print ) printf("Update HII  s_coef: %e    a_coef: %e   HIIp: %e \n", s_coef, a_coef, d_HII_p );
-  
+  s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p +
+           k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * d_HI_p;
+  a_coef  = k_recomb_HII * TS.d_e;
+  d_HII_p = (dt * s_coef + TS.d_HII) / (1.0 + dt * a_coef);
+  if (print) printf("Update HII  s_coef: %e    a_coef: %e   HIIp: %e \n", s_coef, a_coef, d_HII_p);
+
   // Update electron
-  s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 
-           + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0  ;
-  a_coef = - k_coll_i_HI * TS.d_HI  + k_recomb_HII * TS.d_HII - k_coll_i_HeI * TS.d_HeI/4.0 + k_recomb_HeII * TS.d_HeII/4.0
-           - k_coll_i_HeII * TS.d_HeII/4.0 + k_recomb_HeIII * TS.d_HeIII/4.0;
-  d_e_p = ( dt * s_coef + TS.d_e ) / ( 1.0 + dt*a_coef ); 
-  if ( print ) printf("Update e  s_coef: %e    a_coef: %e   ep: %e \n", s_coef, a_coef, d_e_p );
-  
+  s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * TS.d_HI +
+           photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0;
+  a_coef = -k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - k_coll_i_HeI * TS.d_HeI / 4.0 +
+           k_recomb_HeII * TS.d_HeII / 4.0 - k_coll_i_HeII * TS.d_HeII / 4.0 + k_recomb_HeIII * TS.d_HeIII / 4.0;
+  d_e_p = (dt * s_coef + TS.d_e) / (1.0 + dt * a_coef);
+  if (print) printf("Update e  s_coef: %e    a_coef: %e   ep: %e \n", s_coef, a_coef, d_e_p);
+
   // Update HeI
-  s_coef = k_recomb_HeII * TS.d_HeII * TS.d_e;
-  a_coef = k_coll_i_HeI * TS.d_e + photo_i_HeI;
-  d_HeI_p = ( dt * s_coef + TS.d_HeI ) / ( 1.0 + dt*a_coef ); 
-  if ( print ) printf("Update HeI  s_coef: %e    a_coef: %e   HeIp: %e \n", s_coef, a_coef, d_HeI_p );
-  
+  s_coef  = k_recomb_HeII * TS.d_HeII * TS.d_e;
+  a_coef  = k_coll_i_HeI * TS.d_e + photo_i_HeI;
+  d_HeI_p = (dt * s_coef + TS.d_HeI) / (1.0 + dt * a_coef);
+  if (print) printf("Update HeI  s_coef: %e    a_coef: %e   HeIp: %e \n", s_coef, a_coef, d_HeI_p);
+
   // Update HeII
-  s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p;
-  a_coef = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII;
-  d_HeII_p = ( dt * s_coef + TS.d_HeII ) / ( 1.0 + dt*a_coef ); 
-  if ( print ) printf("Update HeII  s_coef: %e    a_coef: %e   HeIIp: %e \n", s_coef, a_coef, d_HeII_p );
-  
+  s_coef   = k_coll_i_HeI * d_HeI_p * TS.d_e + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p;
+  a_coef   = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII;
+  d_HeII_p = (dt * s_coef + TS.d_HeII) / (1.0 + dt * a_coef);
+  if (print) printf("Update HeII  s_coef: %e    a_coef: %e   HeIIp: %e \n", s_coef, a_coef, d_HeII_p);
+
   // Update HeIII
-  s_coef = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p;
-  a_coef = k_recomb_HeIII * TS.d_e;
-  d_HeIII_p = ( dt * s_coef + TS.d_HeIII ) / ( 1.0 + dt*a_coef ); 
-  if ( print ) printf("Update HeIII  s_coef: %e    a_coef: %e   HeIIIp: %e \n", s_coef, a_coef, d_HeIII_p );
-  
+  s_coef    = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p;
+  a_coef    = k_recomb_HeIII * TS.d_e;
+  d_HeIII_p = (dt * s_coef + TS.d_HeIII) / (1.0 + dt * a_coef);
+  if (print) printf("Update HeIII  s_coef: %e    a_coef: %e   HeIIIp: %e \n", s_coef, a_coef, d_HeIII_p);
+
   // Record the temperature for the next step
-  temp_prev = TS.get_temperature( Chem_H.gamma ); 
-  
-  HI_dot_prev = fabs( TS.d_HI - d_HI_p ) / fmax( dt, tiny );
-  TS.d_HI    = fmax( d_HI_p, tiny );
-  TS.d_HII   = fmax( d_HII_p, tiny );
-  TS.d_HeI   = fmax( d_HeI_p, tiny );
-  TS.d_HeII  = fmax( d_HeII_p, tiny );
-  TS.d_HeIII = fmax( d_HeIII_p, 1e-5*tiny );
-  
+  temp_prev = TS.get_temperature(Chem_H.gamma);
+
+  HI_dot_prev = fabs(TS.d_HI - d_HI_p) / fmax(dt, tiny);
+  TS.d_HI     = fmax(d_HI_p, tiny);
+  TS.d_HII    = fmax(d_HII_p, tiny);
+  TS.d_HeI    = fmax(d_HeI_p, tiny);
+  TS.d_HeII   = fmax(d_HeII_p, tiny);
+  TS.d_HeIII  = fmax(d_HeIII_p, 1e-5 * tiny);
+
   // Use charge conservation to determine electron fraction
   e_dot_prev = TS.d_e;
-  TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0;
-  e_dot_prev = fabs( TS.d_e - e_dot_prev ) / fmax( dt, tiny );
-  
+  TS.d_e     = TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII / 2.0;
+  e_dot_prev = fabs(TS.d_e - e_dot_prev) / fmax(dt, tiny);
+
   // Update internal energy
   TS.U += U_dot / TS.d * dt;
   #ifdef TEMPERATURE_FLOOR
-  if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR )  TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); 
+  if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma);
   #endif
-  if ( print ) printf("Updated U: %e \n", TS.U);  
-  
-
- }
-
+  if (print) printf("Updated U: %e \n", TS.U);
+}
 
-__global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt_hydro, Chemistry_Header Chem_H   ){
-  
-    
+__global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields,
+                                        Real dt_hydro, Chemistry_Header Chem_H)
+{
   int id, xid, yid, zid, n_cells, n_iter;
   Real d, d_inv, vx, vy, vz;
   Real GE, E_kin, dt_chem, t_chem;
   Real current_a, a3, a2;
-  
+
   Real current_z, density_conv, energy_conv;
-  current_z = Chem_H.current_z;
+  current_z    = Chem_H.current_z;
   density_conv = Chem_H.density_conversion;
   energy_conv  = Chem_H.energy_conversion;
-  
+
   Real U_dot, HI_dot, e_dot, HI_dot_prev, e_dot_prev, temp_prev;
   Real k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI;
   Real k_recomb_HII, k_recomb_HeII, k_recomb_HeIII;
   float photo_i_HI, photo_i_HeI, photo_i_HeII;
   float photo_h_HI, photo_h_HeI, photo_h_HeII;
   Real correct_H, correct_He;
-  
-  
-  n_cells = nx*ny*nz;
-  
+
+  n_cells = nx * ny * nz;
+
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-  zid = id / (nx*ny);
-  yid = (id - zid*nx*ny) / nx;
-  xid = id - zid*nx*ny - yid*nx;
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
   bool print;
-  
+
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    E_kin = 0.5*d*(vx*vx + vy*vy + vz*vz);
-    #ifdef DE
-    GE = dev_conserved[(n_fields-1)*n_cells + id];
-    #else 
-    GE  = dev_conserved[4*n_cells + id] - E_kin;
-    #endif
-  
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz);
+  #ifdef DE
+    GE = dev_conserved[(n_fields - 1) * n_cells + id];
+  #else
+    GE = dev_conserved[4 * n_cells + id] - E_kin;
+  #endif
+
     print = false;
     // if ( xid == n_ghost && yid == n_ghost && zid == n_ghost ) print = true;
-        
+
     // Convert to cgs units
-    current_a = 1 / ( current_z + 1);
-    a2 = current_a * current_a;
-    a3 = a2 * current_a;  
-    d  *= density_conv / a3;
-    GE *= energy_conv  / a2; 
-    dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * 1000 * KPC / Chem_H.time_units;
-    // delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a + Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) * dt_hydro * Chem_H.time_units;
-        
-    // Initialize the thermal state
-    Thermal_State TS;    
-    TS.d       = dev_conserved[            id]  / a3;
-    TS.d_HI    = dev_conserved[ 5*n_cells + id] / a3; 
-    TS.d_HII   = dev_conserved[ 6*n_cells + id] / a3; 
-    TS.d_HeI   = dev_conserved[ 7*n_cells + id] / a3; 
-    TS.d_HeII  = dev_conserved[ 8*n_cells + id] / a3; 
-    TS.d_HeIII = dev_conserved[ 9*n_cells + id] / a3; 
-    TS.d_e     = dev_conserved[10*n_cells + id] / a3; 
-    TS.U       = GE * d_inv * 1e-10; 
-    
+    current_a = 1 / (current_z + 1);
+    a2        = current_a * current_a;
+    a3        = a2 * current_a;
+    d *= density_conv / a3;
+    GE *= energy_conv / a2;
+    dt_hydro = dt_hydro / Chem_H.time_units;
+
+  #ifdef COSMOLOGY
+    dt_hydro *= current_a * current_a / Chem_H.H0 * 1000 *
+                KPC
+  #endif  // COSMOLOGY
+          // dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 *
+          // 1000 * KPC / Chem_H.time_units;
+          //  delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a +
+          //  Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) *
+          //  dt_hydro * Chem_H.time_units;
+
+                    // Initialize the thermal state
+                    Thermal_State TS;
+    TS.d       = dev_conserved[id] / a3;
+    TS.d_HI    = dev_conserved[id + n_cells * grid_enum::HI_density] / a3;
+    TS.d_HII   = dev_conserved[id + n_cells * grid_enum::HII_density] / a3;
+    TS.d_HeI   = dev_conserved[id + n_cells * grid_enum::HeI_density] / a3;
+    TS.d_HeII  = dev_conserved[id + n_cells * grid_enum::HeII_density] / a3;
+    TS.d_HeIII = dev_conserved[id + n_cells * grid_enum::HeIII_density] / a3;
+    TS.d_e     = dev_conserved[id + n_cells * grid_enum::e_density] / a3;
+    TS.U       = GE * d_inv * 1e-10;
+
     // Ceiling species
-    TS.d_HI    = fmax( TS.d_HI,    tiny );
-    TS.d_HII   = fmax( TS.d_HII,   tiny );
-    TS.d_HeI   = fmax( TS.d_HeI,   tiny );
-    TS.d_HeII  = fmax( TS.d_HeII,  tiny );
-    TS.d_HeIII = fmax( TS.d_HeIII, 1e-5*tiny );
-    TS.d_e     = fmax( TS.d_e,     tiny );
-    
+    TS.d_HI    = fmax(TS.d_HI, tiny);
+    TS.d_HII   = fmax(TS.d_HII, tiny);
+    TS.d_HeI   = fmax(TS.d_HeI, tiny);
+    TS.d_HeII  = fmax(TS.d_HeII, tiny);
+    TS.d_HeIII = fmax(TS.d_HeIII, 1e-5 * tiny);
+    TS.d_e     = fmax(TS.d_e, tiny);
+
     // Compute temperature at first iteration
-    temp_prev = TS.get_temperature( Chem_H.gamma );
-    
+    temp_prev = TS.get_temperature(Chem_H.gamma);
+
     // if (print){
     //   printf("current_z: %f\n", current_z );
     //   printf("density_units: %e\n", Chem_H.density_units );
@@ -510,551 +531,486 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in
     //   printf("energy: %e \n", TS.U*TS.d );
     //   printf("dt_hydro: %e \n", dt_hydro / Chem_H.time_units );
     // }
-    
+
     // Get the photoheating and photoionization rates at z=current_z
-    Get_Current_UVB_Rates( current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, 
-      photo_h_HI, photo_h_HeI, photo_h_HeII, print );
-    
+    Get_Current_UVB_Rates(current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, photo_h_HI, photo_h_HeI,
+                          photo_h_HeII, print);
+
     HI_dot_prev = 0;
-    e_dot_prev  = 0; 
-    n_iter = 0;
-    t_chem = 0;
-    while ( t_chem < dt_hydro ){
-      
-      if (print) printf("########################################## Iter %d \n", n_iter );
-          
-      U_dot = Get_Cooling_Rates( TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, 
-                                 photo_h_HI, photo_h_HeI, photo_h_HeII, print );
-      
-      Get_Reaction_Rates( TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII,
-                          k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, print  );
-      
-      dt_chem = Get_Chemistry_dt( TS, Chem_H, HI_dot, e_dot, U_dot, 
-                        k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI,
-                        k_recomb_HII, k_recomb_HeII, k_recomb_HeIII,
-                        photo_i_HI, photo_i_HeI, photo_i_HeII, 
-                        n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print  );
-                        
-      Update_Step( TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI,
-                   k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev, 
-                   e_dot_prev, temp_prev, print );
-                
+    e_dot_prev  = 0;
+    n_iter      = 0;
+    t_chem      = 0;
+    while (t_chem < dt_hydro) {
+      if (print) printf("########################################## Iter %d \n", n_iter);
+
+      U_dot = Get_Cooling_Rates(TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, photo_h_HI, photo_h_HeI,
+                                photo_h_HeII, print);
+
+      Get_Reaction_Rates(TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI,
+                         k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, print);
+
+      dt_chem =
+          Get_Chemistry_dt(TS, Chem_H, HI_dot, e_dot, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI,
+                           k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI,
+                           photo_i_HeII, n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print);
+
+      Update_Step(TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI,
+                  k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev,
+                  e_dot_prev, temp_prev, print);
+
       t_chem += dt_chem;
       n_iter += 1;
-      if ( n_iter == Chem_H.max_iter ) break;
-          
+      if (n_iter == Chem_H.max_iter) break;
     }
-    if ( print ) printf("Chem_GPU: N Iter:  %d\n", n_iter );      
-    
+    if (print) printf("Chem_GPU: N Iter:  %d\n", n_iter);
+
     // Make consistent abundances with the H and He density
-    correct_H = Chem_H.H_fraction * TS.d / ( TS.d_HI + TS.d_HII );
-    correct_He = ( 1.0 - Chem_H.H_fraction ) * TS.d / ( TS.d_HeI + TS.d_HeII + TS.d_HeIII );
-    TS.d_HI    *= correct_H;
-    TS.d_HII   *= correct_H;
-    TS.d_HeI   *= correct_He;
-    TS.d_HeII  *= correct_He;
+    correct_H  = Chem_H.H_fraction * TS.d / (TS.d_HI + TS.d_HII);
+    correct_He = (1.0 - Chem_H.H_fraction) * TS.d / (TS.d_HeI + TS.d_HeII + TS.d_HeIII);
+    TS.d_HI *= correct_H;
+    TS.d_HII *= correct_H;
+    TS.d_HeI *= correct_He;
+    TS.d_HeII *= correct_He;
     TS.d_HeIII *= correct_He;
-    
+
     // Use charge conservation to determine electron fractioan
-    TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0;
-       
+    TS.d_e = TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII / 2.0;
+
     // Write the Updated Thermal State
-    dev_conserved[ 5*n_cells + id] = TS.d_HI    * a3; 
-    dev_conserved[ 6*n_cells + id] = TS.d_HII   * a3; 
-    dev_conserved[ 7*n_cells + id] = TS.d_HeI   * a3; 
-    dev_conserved[ 8*n_cells + id] = TS.d_HeII  * a3; 
-    dev_conserved[ 9*n_cells + id] = TS.d_HeIII * a3; 
-    dev_conserved[10*n_cells + id] = TS.d_e     * a3; 
-    d = d / density_conv * a3;
-    GE = TS.U / d_inv / energy_conv * a2 / 1e-10;
-    dev_conserved[4*n_cells + id]  = GE + E_kin;  
-    #ifdef DE
-    dev_conserved[(n_fields-1)*n_cells + id] = GE;
-    #endif
-    
-    if ( print ) printf("###########################################\n" );
-    if ( print ) printf("Updated HI:  %e\n",    TS.d_HI    * a3  );
-    if ( print ) printf("Updated HII:  %e\n",   TS.d_HII   * a3  );
-    if ( print ) printf("Updated HeI:  %e\n",   TS.d_HeI   * a3  );
-    if ( print ) printf("Updated HeII:  %e\n",  TS.d_HeII  * a3  );
-    if ( print ) printf("Updated HeIII:  %e\n", TS.d_HeIII * a3  );    
-    if ( print ) printf("Updated e:  %e\n",     TS.d_e     * a3  );
-    if ( print ) printf("Updated GE:  %e\n", dev_conserved[(n_fields-1)*n_cells + id]  );
-    if ( print ) printf("Updated E:   %e\n", dev_conserved[4*n_cells + id]  );
-    
+    dev_conserved[id + n_cells * grid_enum::HI_density]    = TS.d_HI * a3;
+    dev_conserved[id + n_cells * grid_enum::HII_density]   = TS.d_HII * a3;
+    dev_conserved[id + n_cells * grid_enum::HeI_density]   = TS.d_HeI * a3;
+    dev_conserved[id + n_cells * grid_enum::HeII_density]  = TS.d_HeII * a3;
+    dev_conserved[id + n_cells * grid_enum::HeIII_density] = TS.d_HeIII * a3;
+    dev_conserved[id + n_cells * grid_enum::e_density]     = TS.d_e * a3;
+    d                                                      = d / density_conv * a3;
+    GE                                                     = TS.U / d_inv / energy_conv * a2 / 1e-10;
+    dev_conserved[4 * n_cells + id]                        = GE + E_kin;
+  #ifdef DE
+    dev_conserved[(n_fields - 1) * n_cells + id] = GE;
+  #endif
+
+    if (print) printf("###########################################\n");
+    if (print) printf("Updated HI:  %e\n", TS.d_HI * a3);
+    if (print) printf("Updated HII:  %e\n", TS.d_HII * a3);
+    if (print) printf("Updated HeI:  %e\n", TS.d_HeI * a3);
+    if (print) printf("Updated HeII:  %e\n", TS.d_HeII * a3);
+    if (print) printf("Updated HeIII:  %e\n", TS.d_HeIII * a3);
+    if (print) printf("Updated e:  %e\n", TS.d_e * a3);
+    if (print) printf("Updated GE:  %e\n", dev_conserved[(n_fields - 1) * n_cells + id]);
+    if (print) printf("Updated E:   %e\n", dev_conserved[4 * n_cells + id]);
   }
 }
 
-void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H){
-  
+void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt,
+                         Chemistry_Header &Chem_H)
+{
   float time;
   cudaEvent_t start, stop;
   cudaEventCreate(&start);
   cudaEventCreate(&stop);
   cudaEventRecord(start, 0);
-  
-  int ngrid = (nx*ny*nz - 1) / TPB_CHEM + 1;
+
+  int ngrid = (nx * ny * nz - 1) / TPB_CHEM + 1;
   dim3 dim1dGrid(ngrid, 1, 1);
-  dim3 dim1dBlock(TPB_CHEM, 1, 1);                                          
-  hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H );
-  
-  CudaCheckError();
+  dim3 dim1dBlock(TPB_CHEM, 1, 1);
+  hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields,
+                     dt, Chem_H);
+
+  GPU_Error_Check();
   cudaEventRecord(stop, 0);
   cudaEventSynchronize(stop);
   cudaEventElapsedTime(&time, start, stop);
-  Chem_H.runtime_chemistry_step = (Real) time/1000; // (Convert ms to secs )
-
+  Chem_H.runtime_chemistry_step = (Real)time / 1000;  // (Convert ms to secs )
 }
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Reaction and cooling rates from Grackle
-
-//Kelvin to eV conversion factor
-#ifndef tevk
-#define tevk 1.1605e4
-#endif
-//Comparison value
-#ifndef dhuge
-#define dhuge 1.0e30
-#endif
-//Small value
-#ifndef tiny
-#define tiny 1.0e-20
-#endif
-// Boltzmann's constant
-#ifndef kboltz
-#define kboltz 1.3806504e-16 //Boltzmann's constant [cm2gs-2K-1] or [ergK-1] 
-#endif
+  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+  // Reaction and cooling rates from Grackle
 
+  // Kelvin to eV conversion factor
+  #ifndef tevk
+    #define tevk 1.1605e4
+  #endif
+  // Comparison value
+  #ifndef dhuge
+    #define dhuge 1.0e30
+  #endif
+  // Small value
+  #ifndef tiny
+    #define tiny 1.0e-20
+  #endif
+  // Boltzmann's constant
+  #ifndef kboltz
+    #define kboltz 1.3806504e-16  // Boltzmann's constant [cm2gs-2K-1] or [ergK-1]
+  #endif
 
 // Calculation of k1 (HI + e --> HII + 2e)
 // k1_rate
-__device__ Real coll_i_HI_rate( Real T, Real units )
-{
-    Real T_ev = T / 11605.0;
-    Real logT_ev = log(T_ev);
-
-    Real k1 = exp( -32.71396786375
-                        + 13.53655609057*logT_ev
-                        - 5.739328757388*pow(logT_ev, 2)
-                        + 1.563154982022*pow(logT_ev, 3)
-                        - 0.2877056004391*pow(logT_ev, 4)
-                        + 0.03482559773736999*pow(logT_ev, 5)
-                        - 0.00263197617559*pow(logT_ev, 6)
-                        + 0.0001119543953861*pow(logT_ev, 7)
-                        - 2.039149852002e-6*pow(logT_ev, 8)) / units;
-    if (T_ev <= 0.8){
-        k1 = fmax(tiny, k1); 
-    }
-    return k1;
+__device__ Real coll_i_HI_rate(Real T, Real units)
+{
+  Real T_ev    = T / 11605.0;
+  Real logT_ev = log(T_ev);
+
+  Real k1 = exp(-32.71396786375 + 13.53655609057 * logT_ev - 5.739328757388 * pow(logT_ev, 2) +
+                1.563154982022 * pow(logT_ev, 3) - 0.2877056004391 * pow(logT_ev, 4) +
+                0.03482559773736999 * pow(logT_ev, 5) - 0.00263197617559 * pow(logT_ev, 6) +
+                0.0001119543953861 * pow(logT_ev, 7) - 2.039149852002e-6 * pow(logT_ev, 8)) /
+            units;
+  if (T_ev <= 0.8) {
+    k1 = fmax(tiny, k1);
+  }
+  return k1;
 }
 
-//Calculation of k3 (HeI + e --> HeII + 2e)
-// k3_rate
-__device__ Real coll_i_HeI_rate( Real T, Real units )
+// Calculation of k3 (HeI + e --> HeII + 2e)
+//  k3_rate
+__device__ Real coll_i_HeI_rate(Real T, Real units)
 {
-    Real T_ev = T / 11605.0;
-    Real logT_ev = log(T_ev);
+  Real T_ev    = T / 11605.0;
+  Real logT_ev = log(T_ev);
 
-    if (T_ev > 0.8){
-        return exp( -44.09864886561001
-                + 23.91596563469*logT_ev
-                - 10.75323019821*pow(logT_ev, 2)
-                + 3.058038757198*pow(logT_ev, 3)
-                - 0.5685118909884001*pow(logT_ev, 4)
-                + 0.06795391233790001*pow(logT_ev, 5)
-                - 0.005009056101857001*pow(logT_ev, 6)
-                + 0.0002067236157507*pow(logT_ev, 7)
-                - 3.649161410833e-6*pow(logT_ev, 8)) / units;
-    } else {
-        return tiny;
-    }
-}   
+  if (T_ev > 0.8) {
+    return exp(-44.09864886561001 + 23.91596563469 * logT_ev - 10.75323019821 * pow(logT_ev, 2) +
+               3.058038757198 * pow(logT_ev, 3) - 0.5685118909884001 * pow(logT_ev, 4) +
+               0.06795391233790001 * pow(logT_ev, 5) - 0.005009056101857001 * pow(logT_ev, 6) +
+               0.0002067236157507 * pow(logT_ev, 7) - 3.649161410833e-6 * pow(logT_ev, 8)) /
+           units;
+  } else {
+    return tiny;
+  }
+}
 
-//Calculation of k4 (HeII + e --> HeI + photon)
-// k4_rate
-__device__ Real recomb_HeII_rate( Real T, Real units, bool use_case_B )
+// Calculation of k4 (HeII + e --> HeI + photon)
+//  k4_rate
+__device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B)
 {
-    Real T_ev = T / 11605.0;
-    Real logT_ev = log(T_ev);
-    //If case B recombination on.
-    if (use_case_B){
-        return 1.26e-14 * pow(5.7067e5/T, 0.75) / units;
-    }
+  Real T_ev    = T / 11605.0;
+  Real logT_ev = log(T_ev);
+  // If case B recombination on.
+  if (use_case_B) {
+    return 1.26e-14 * pow(5.7067e5 / T, 0.75) / units;
+  }
 
-    //If case B recombination off.
-    if (T_ev > 0.8){
-        return (1.54e-9*(1.0 + 0.3 / exp(8.099328789667/T_ev))
-             / (exp(40.49664394833662/T_ev)*pow(T_ev, 1.5))
-             + 3.92e-13/pow(T_ev, 0.6353)) / units;
-    } else {
-        return 3.92e-13/pow(T_ev, 0.6353) / units;
-    }
+  // If case B recombination off.
+  if (T_ev > 0.8) {
+    return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) +
+            3.92e-13 / pow(T_ev, 0.6353)) /
+           units;
+  } else {
+    return 3.92e-13 / pow(T_ev, 0.6353) / units;
+  }
 }
 // k4_rate Case A
-__device__ Real recomb_HeII_rate_case_A( Real T, Real units )
-{
-    Real T_ev = T / 11605.0;
-    Real logT_ev = log(T_ev);
-    if (T_ev > 0.8){
-        return (1.54e-9*(1.0 + 0.3 / exp(8.099328789667/T_ev))
-             / (exp(40.49664394833662/T_ev)*pow(T_ev, 1.5))
-             + 3.92e-13/pow(T_ev, 0.6353)) / units;
-    } else {
-        return 3.92e-13/pow(T_ev, 0.6353) / units;
-    }
+__device__ Real recomb_HeII_rate_case_A(Real T, Real units)
+{
+  Real T_ev    = T / 11605.0;
+  Real logT_ev = log(T_ev);
+  if (T_ev > 0.8) {
+    return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) +
+            3.92e-13 / pow(T_ev, 0.6353)) /
+           units;
+  } else {
+    return 3.92e-13 / pow(T_ev, 0.6353) / units;
+  }
 }
 // k4_rate Case B
-__device__ Real recomb_HeII_rate_case_B( Real T, Real units )
+__device__ Real recomb_HeII_rate_case_B(Real T, Real units)
 {
-    //If case B recombination on.
-    return 1.26e-14 * pow(5.7067e5/T, 0.75) / units;  
+  // If case B recombination on.
+  return 1.26e-14 * pow(5.7067e5 / T, 0.75) / units;
 }
 
-
-//Calculation of k2 (HII + e --> HI + photon)
-// k2_rate
-__device__ Real recomb_HII_rate( Real T, Real units, bool use_case_B )
+// Calculation of k2 (HII + e --> HI + photon)
+//  k2_rate
+__device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B)
 {
-    if (use_case_B) {
-        if (T < 1.0e9) {
-            return 4.881357e-6*pow(T, -1.5) \
-                * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units;
-        } else {
-            return tiny;
-        }  
+  if (use_case_B) {
+    if (T < 1.0e9) {
+      return 4.881357e-6 * pow(T, -1.5) * pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units;
     } else {
-        if (T > 5500) {
-            //Convert temperature to appropriate form.
-            Real T_ev = T / tevk;
-            Real logT_ev = log(T_ev);
-
-            return exp( -28.61303380689232 \
-                - 0.7241125657826851*logT_ev \
-                - 0.02026044731984691*pow(logT_ev, 2) \
-                - 0.002380861877349834*pow(logT_ev, 3) \
-                - 0.0003212605213188796*pow(logT_ev, 4) \
-                - 0.00001421502914054107*pow(logT_ev, 5) \
-                + 4.989108920299513e-6*pow(logT_ev, 6) \
-                + 5.755614137575758e-7*pow(logT_ev, 7) \
-                - 1.856767039775261e-8*pow(logT_ev, 8) \
-                - 3.071135243196595e-9*pow(logT_ev, 9)) / units;
-        } else {
-            return recomb_HeII_rate(T, units, use_case_B);
-        }
+      return tiny;
     }
-}
-// k2_rate Case A
-__device__ Real recomb_HII_rate_case_A( Real T, Real units )
-{
+  } else {
     if (T > 5500) {
-        //Convert temperature to appropriate form.
-        Real T_ev = T / tevk;
-        Real logT_ev = log(T_ev);
-
-        return exp( -28.61303380689232 \
-            - 0.7241125657826851*logT_ev \
-            - 0.02026044731984691*pow(logT_ev, 2) \
-            - 0.002380861877349834*pow(logT_ev, 3) \
-            - 0.0003212605213188796*pow(logT_ev, 4) \
-            - 0.00001421502914054107*pow(logT_ev, 5) \
-            + 4.989108920299513e-6*pow(logT_ev, 6) \
-            + 5.755614137575758e-7*pow(logT_ev, 7) \
-            - 1.856767039775261e-8*pow(logT_ev, 8) \
-            - 3.071135243196595e-9*pow(logT_ev, 9)) / units;
+      // Convert temperature to appropriate form.
+      Real T_ev    = T / tevk;
+      Real logT_ev = log(T_ev);
+
+      return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - 0.02026044731984691 * pow(logT_ev, 2) -
+                 0.002380861877349834 * pow(logT_ev, 3) - 0.0003212605213188796 * pow(logT_ev, 4) -
+                 0.00001421502914054107 * pow(logT_ev, 5) + 4.989108920299513e-6 * pow(logT_ev, 6) +
+                 5.755614137575758e-7 * pow(logT_ev, 7) - 1.856767039775261e-8 * pow(logT_ev, 8) -
+                 3.071135243196595e-9 * pow(logT_ev, 9)) /
+             units;
     } else {
-        return recomb_HeII_rate_case_A(T, units );
+      return recomb_HeII_rate(T, units, use_case_B);
     }
+  }
+}
+// k2_rate Case A
+__device__ Real recomb_HII_rate_case_A(Real T, Real units)
+{
+  if (T > 5500) {
+    // Convert temperature to appropriate form.
+    Real T_ev    = T / tevk;
+    Real logT_ev = log(T_ev);
+
+    return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - 0.02026044731984691 * pow(logT_ev, 2) -
+               0.002380861877349834 * pow(logT_ev, 3) - 0.0003212605213188796 * pow(logT_ev, 4) -
+               0.00001421502914054107 * pow(logT_ev, 5) + 4.989108920299513e-6 * pow(logT_ev, 6) +
+               5.755614137575758e-7 * pow(logT_ev, 7) - 1.856767039775261e-8 * pow(logT_ev, 8) -
+               3.071135243196595e-9 * pow(logT_ev, 9)) /
+           units;
+  } else {
+    return recomb_HeII_rate_case_A(T, units);
+  }
 }
 
 // k2_rate Case B
-__device__ Real recomb_HII_rate_case_B( Real T, Real units )
+__device__ Real recomb_HII_rate_case_B(Real T, Real units)
 {
-    if (T < 1.0e9) {
-        return 4.881357e-6*pow(T, -1.5) \
-            * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units;
-    } else {
-        return tiny;
-    }  
+  if (T < 1.0e9) {
+    return 4.881357e-6 * pow(T, -1.5) * pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units;
+  } else {
+    return tiny;
+  }
 }
 
-
-//Calculation of k5 (HeII + e --> HeIII + 2e)
-// k5_rate
-__device__ Real coll_i_HeII_rate( Real T, Real units )
+// Calculation of k5 (HeII + e --> HeIII + 2e)
+//  k5_rate
+__device__ Real coll_i_HeII_rate(Real T, Real units)
 {
-    Real T_ev = T / 11605.0;
-    Real logT_ev = log(T_ev);
+  Real T_ev    = T / 11605.0;
+  Real logT_ev = log(T_ev);
 
-    Real k5;
-    if (T_ev > 0.8){
-        k5 = exp(-68.71040990212001
-                + 43.93347632635*logT_ev
-                - 18.48066993568*pow(logT_ev, 2)
-                + 4.701626486759002*pow(logT_ev, 3)
-                - 0.7692466334492*pow(logT_ev, 4)
-                + 0.08113042097303*pow(logT_ev, 5)
-                - 0.005324020628287001*pow(logT_ev, 6)
-                + 0.0001975705312221*pow(logT_ev, 7)
-                - 3.165581065665e-6*pow(logT_ev, 8)) / units;
-    } else {
-        k5 = tiny;
-    }
-    return k5;
+  Real k5;
+  if (T_ev > 0.8) {
+    k5 = exp(-68.71040990212001 + 43.93347632635 * logT_ev - 18.48066993568 * pow(logT_ev, 2) +
+             4.701626486759002 * pow(logT_ev, 3) - 0.7692466334492 * pow(logT_ev, 4) +
+             0.08113042097303 * pow(logT_ev, 5) - 0.005324020628287001 * pow(logT_ev, 6) +
+             0.0001975705312221 * pow(logT_ev, 7) - 3.165581065665e-6 * pow(logT_ev, 8)) /
+         units;
+  } else {
+    k5 = tiny;
+  }
+  return k5;
 }
 
-//Calculation of k6 (HeIII + e --> HeII + photon)
-// k6_rate
-__device__ Real recomb_HeIII_rate( Real T, Real units, bool use_case_B )
+// Calculation of k6 (HeIII + e --> HeII + photon)
+//  k6_rate
+__device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B)
 {
-    Real k6;
-    //Has case B recombination setting.
-    if (use_case_B) {
-        if (T < 1.0e9) {
-            k6 = 7.8155e-5*pow(T, -1.5)
-                * pow((1.0 + 2.0189e2*pow(T, -0.407)), -2.242) / units;
-        } else {
-            k6 = tiny;
-        }
+  Real k6;
+  // Has case B recombination setting.
+  if (use_case_B) {
+    if (T < 1.0e9) {
+      k6 = 7.8155e-5 * pow(T, -1.5) * pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units;
     } else {
-        k6 = 3.36e-10/sqrt(T)/pow(T/1.0e3, 0.2)
-             / (1.0 + pow(T/1.0e6, 0.7)) / units;
+      k6 = tiny;
     }
-    return k6;
+  } else {
+    k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / units;
+  }
+  return k6;
 }
 // k6_rate Case A
-__device__ Real recomb_HeIII_rate_case_A( Real T, Real units )
+__device__ Real recomb_HeIII_rate_case_A(Real T, Real units)
 {
-    Real k6;
-    //Has case B recombination setting.
-    k6 = 3.36e-10/sqrt(T)/pow(T/1.0e3, 0.2)
-         / (1.0 + pow(T/1.0e6, 0.7)) / units;
-    return k6;
+  Real k6;
+  // Has case B recombination setting.
+  k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / units;
+  return k6;
 }
 // k6_rate Case B
-__device__ Real recomb_HeIII_rate_case_B( Real T, Real units )
-{
-    Real k6;
-    //Has case B recombination setting.
-    if (T < 1.0e9) {
-        k6 = 7.8155e-5*pow(T, -1.5)
-            * pow((1.0 + 2.0189e2*pow(T, -0.407)), -2.242) / units;
-    } else {
-        k6 = tiny;
-    }
-    return k6;
+__device__ Real recomb_HeIII_rate_case_B(Real T, Real units)
+{
+  Real k6;
+  // Has case B recombination setting.
+  if (T < 1.0e9) {
+    k6 = 7.8155e-5 * pow(T, -1.5) * pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units;
+  } else {
+    k6 = tiny;
+  }
+  return k6;
 }
 
-//Calculation of k57 (HI + HI --> HII + HI + e)
-// k57_rate
-__device__ Real coll_i_HI_HI_rate( Real T, Real units )
+// Calculation of k57 (HI + HI --> HII + HI + e)
+//  k57_rate
+__device__ Real coll_i_HI_HI_rate(Real T, Real units)
 {
-    // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991).
-    // k57 value based on experimental cross-sections from Gealy & van Zyl (1987).
-    if (T > 3.0e3) {
-        return 1.2e-17  * pow(T, 1.2) * exp(-1.578e5 / T) / units;
-    } else {
-        return tiny;
-    }
+  // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991).
+  // k57 value based on experimental cross-sections from Gealy & van Zyl (1987).
+  if (T > 3.0e3) {
+    return 1.2e-17 * pow(T, 1.2) * exp(-1.578e5 / T) / units;
+  } else {
+    return tiny;
+  }
 }
 
-//Calculation of k58 (HI + HeI --> HII + HeI + e)
-// k58_rate
-__device__ Real coll_i_HI_HeI_rate( Real T, Real units )
+// Calculation of k58 (HI + HeI --> HII + HeI + e)
+//  k58_rate
+__device__ Real coll_i_HI_HeI_rate(Real T, Real units)
 {
-    // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991).
-    // k58 value based on cross-sections from van Zyl, Le & Amme (1981).
-    if (T > 3.0e3) {
-        return 1.75e-17 * pow(T, 1.3) * exp(-1.578e5 / T) / units;
-    } else {
-        return tiny;
-    }
+  // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991).
+  // k58 value based on cross-sections from van Zyl, Le & Amme (1981).
+  if (T > 3.0e3) {
+    return 1.75e-17 * pow(T, 1.3) * exp(-1.578e5 / T) / units;
+  } else {
+    return tiny;
+  }
 }
 
-//Calculation of ceHI.
-// Cooling collisional excitation HI
-__host__ __device__ Real cool_ceHI_rate( Real T, Real units )
+// Calculation of ceHI.
+//  Cooling collisional excitation HI
+__host__ __device__ Real cool_ceHI_rate(Real T, Real units)
 {
-    return 7.5e-19*exp( -fmin(log(dhuge), 118348.0 / T) )
-            / ( 1.0 + sqrt(T / 1.0e5) ) / units;    
+  return 7.5e-19 * exp(-fmin(log(dhuge), 118348.0 / T)) / (1.0 + sqrt(T / 1.0e5)) / units;
 }
 
-//Calculation of ceHeI.
-// Cooling collisional ionization HeI
-__host__ __device__ Real cool_ceHeI_rate( Real T, Real units )
+// Calculation of ceHeI.
+//  Cooling collisional ionization HeI
+__host__ __device__ Real cool_ceHeI_rate(Real T, Real units)
 {
-    return 9.1e-27*exp(-fmin(log(dhuge), 13179.0/T))
-            * pow(T, -0.1687) / ( 1.0 + sqrt(T/1.0e5) ) / units;
+  return 9.1e-27 * exp(-fmin(log(dhuge), 13179.0 / T)) * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) / units;
 }
 
-//Calculation of ceHeII.
-// Cooling collisional excitation HeII
-__host__ __device__ Real cool_ceHeII_rate( Real T, Real units )
+// Calculation of ceHeII.
+//  Cooling collisional excitation HeII
+__host__ __device__ Real cool_ceHeII_rate(Real T, Real units)
 {
-    return 5.54e-17*exp(-fmin(log(dhuge), 473638.0/T))
-            * pow(T, -0.3970) / ( 1.0 + sqrt(T/1.0e5) ) / units;
+  return 5.54e-17 * exp(-fmin(log(dhuge), 473638.0 / T)) * pow(T, -0.3970) / (1.0 + sqrt(T / 1.0e5)) / units;
 }
 
-//Calculation of ciHeIS.
-// Cooling collisional ionization HeIS
-__host__ __device__ Real cool_ciHeIS_rate( Real T, Real units )
+// Calculation of ciHeIS.
+//  Cooling collisional ionization HeIS
+__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units)
 {
-    return 5.01e-27*pow(T, -0.1687) / ( 1.0 + sqrt(T/1.0e5) )
-              * exp(-fmin(log(dhuge), 55338.0/T)) / units;
+  return 5.01e-27 * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) * exp(-fmin(log(dhuge), 55338.0 / T)) / units;
 }
 
-//Calculation of ciHI.
-// Cooling collisional ionization HI
-__host__ __device__ Real cool_ciHI_rate( Real T, Real units )
+// Calculation of ciHI.
+//  Cooling collisional ionization HI
+__host__ __device__ Real cool_ciHI_rate(Real T, Real units)
 {
-    //Collisional ionization. Polynomial fit from Tom Abel.
-    return 2.18e-11 * coll_i_HI_rate(T, 1) / units;    
+  // Collisional ionization. Polynomial fit from Tom Abel.
+  return 2.18e-11 * coll_i_HI_rate(T, 1) / units;
 }
 
-
-//Calculation of ciHeI.
-// Cooling collisional ionization HeI
-__host__ __device__ Real cool_ciHeI_rate( Real T, Real units )
+// Calculation of ciHeI.
+//  Cooling collisional ionization HeI
+__host__ __device__ Real cool_ciHeI_rate(Real T, Real units)
 {
-    //Collisional ionization. Polynomial fit from Tom Abel.
-    return 3.94e-11 * coll_i_HeI_rate(T, 1) / units;
+  // Collisional ionization. Polynomial fit from Tom Abel.
+  return 3.94e-11 * coll_i_HeI_rate(T, 1) / units;
 }
 
-//Calculation of ciHeII.
-// Cooling collisional ionization HeII
-__host__ __device__ Real cool_ciHeII_rate( Real T, Real units )
+// Calculation of ciHeII.
+//  Cooling collisional ionization HeII
+__host__ __device__ Real cool_ciHeII_rate(Real T, Real units)
 {
-    //Collisional ionization. Polynomial fit from Tom Abel.
-    return 8.72e-11 * coll_i_HeII_rate(T, 1) / units; 
+  // Collisional ionization. Polynomial fit from Tom Abel.
+  return 8.72e-11 * coll_i_HeII_rate(T, 1) / units;
 }
 
-//Calculation of reHII.
-// Cooling recombination HII
-__host__ __device__ Real cool_reHII_rate( Real T, Real units, bool use_case_B )
+// Calculation of reHII.
+//  Cooling recombination HII
+__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B)
 {
-    Real lambdaHI    = 2.0 * 157807.0 / T;
-    if (use_case_B) {
-        return 3.435e-30 * T * pow(lambdaHI, 1.970)
-                / pow( 1.0 + pow(lambdaHI/2.25, 0.376), 3.720)
-                / units;
-    } else {
-        return 1.778e-29 * T * pow(lambdaHI, 1.965)
-                / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697)
-                / units; 
-    }
+  Real lambdaHI = 2.0 * 157807.0 / T;
+  if (use_case_B) {
+    return 3.435e-30 * T * pow(lambdaHI, 1.970) / pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units;
+  } else {
+    return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units;
+  }
 }
 
-//Calculation of reHII.
-// Cooling recombination HII Case A
-__host__ __device__ Real cool_reHII_rate_case_A( Real T, Real units )
+// Calculation of reHII.
+//  Cooling recombination HII Case A
+__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units)
 {
-    Real lambdaHI    = 2.0 * 157807.0 / T;
-    return 1.778e-29 * T * pow(lambdaHI, 1.965)
-            / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697)
-            / units; 
+  Real lambdaHI = 2.0 * 157807.0 / T;
+  return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units;
 }
 
-//Calculation of reHII.
-// Cooling recombination HII Case B
-__host__ __device__ Real cool_reHII_rate_case_B( Real T, Real units )
+// Calculation of reHII.
+//  Cooling recombination HII Case B
+__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units)
 {
-    Real lambdaHI    = 2.0 * 157807.0 / T;
-    return 3.435e-30 * T * pow(lambdaHI, 1.970)
-            / pow( 1.0 + pow(lambdaHI/2.25, 0.376), 3.720)
-            / units;
+  Real lambdaHI = 2.0 * 157807.0 / T;
+  return 3.435e-30 * T * pow(lambdaHI, 1.970) / pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units;
 }
 
-//Calculation of reHII.
-// Cooling recombination HeII
-__host__ __device__ Real cool_reHeII1_rate( Real T, Real units, bool use_case_B )
+// Calculation of reHII.
+//  Cooling recombination HeII
+__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B)
 {
-    Real lambdaHeII  = 2.0 * 285335.0 / T;
-    if ( use_case_B ) {
-        return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75)
-                        / units;
-    } else {
-        return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654)
-                / units;
-    }    
+  Real lambdaHeII = 2.0 * 285335.0 / T;
+  if (use_case_B) {
+    return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) / units;
+  } else {
+    return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) / units;
+  }
 }
 
-//Calculation of reHII.
-// Cooling recombination HeII Case A
-__host__ __device__ Real cool_reHeII1_rate_case_A( Real T, Real units )
+// Calculation of reHII.
+//  Cooling recombination HeII Case A
+__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units)
 {
-    Real lambdaHeII  = 2.0 * 285335.0 / T;
-    return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654)
-              / units;
+  Real lambdaHeII = 2.0 * 285335.0 / T;
+  return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) / units;
 }
 
-//Calculation of reHII.
-// Cooling recombination HeII Case B
-__host__ __device__ Real cool_reHeII1_rate_case_B( Real T, Real units )
+// Calculation of reHII.
+//  Cooling recombination HeII Case B
+__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units)
 {
-    Real lambdaHeII  = 2.0 * 285335.0 / T;
-    return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75)
-                    / units;    
+  Real lambdaHeII = 2.0 * 285335.0 / T;
+  return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) / units;
 }
 
-//Calculation of reHII2.
-// Cooling recombination HeII Dielectronic
-__host__ __device__ Real cool_reHeII2_rate( Real T, Real units )
+// Calculation of reHII2.
+//  Cooling recombination HeII Dielectronic
+__host__ __device__ Real cool_reHeII2_rate(Real T, Real units)
 {
-    //Dielectronic recombination (Cen, 1992).
-    return 1.24e-13 * pow(T, -1.5)
-            * exp( -fmin(log(dhuge), 470000.0 / T) )
-            * ( 1.0 + 0.3 * exp( -fmin(log(dhuge), 94000.0 / T) ) ) 
-            / units;
+  // Dielectronic recombination (Cen, 1992).
+  return 1.24e-13 * pow(T, -1.5) * exp(-fmin(log(dhuge), 470000.0 / T)) *
+         (1.0 + 0.3 * exp(-fmin(log(dhuge), 94000.0 / T))) / units;
 }
 
-//Calculation of reHIII.
-// Cooling recombination HeIII
-__host__ __device__ Real cool_reHeIII_rate( Real T, Real units, bool use_case_B )
+// Calculation of reHIII.
+//  Cooling recombination HeIII
+__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B)
 {
-    Real lambdaHeIII = 2.0 * 631515.0 / T;
-    if ( use_case_B ) {
-        return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970)
-                / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) 
-                / units;
-    } else {
-        return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965)
-                / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697)
-                / units;
-    }
+  Real lambdaHeIII = 2.0 * 631515.0 / T;
+  if (use_case_B) {
+    return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units;
+  } else {
+    return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units;
+  }
 }
 
-//Calculation of reHIII.
-// Cooling recombination HeIII Case A
-__host__ __device__ Real cool_reHeIII_rate_case_A( Real T, Real units )
+// Calculation of reHIII.
+//  Cooling recombination HeIII Case A
+__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units)
 {
-    Real lambdaHeIII = 2.0 * 631515.0 / T;
-    return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965)
-            / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697)
-            / units;
+  Real lambdaHeIII = 2.0 * 631515.0 / T;
+  return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units;
 }
 
-//Calculation of reHIII.
-// Cooling recombination HeIII Case B
-__host__ __device__ Real cool_reHeIII_rate_case_B( Real T, Real units )
+// Calculation of reHIII.
+//  Cooling recombination HeIII Case B
+__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units)
 {
-    Real lambdaHeIII = 2.0 * 631515.0 / T;
-    return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970)
-            / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) 
-            / units;
+  Real lambdaHeIII = 2.0 * 631515.0 / T;
+  return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units;
 }
-//Calculation of brem.
-// Cooling Bremsstrahlung
-__host__ __device__ Real cool_brem_rate( Real T, Real units )
+// Calculation of brem.
+//  Cooling Bremsstrahlung
+__host__ __device__ Real cool_brem_rate(Real T, Real units)
 {
-    return 1.43e-27 * sqrt(T)
-            * ( 1.1 + 0.34 * exp( -pow(5.5 - log10(T), 2) / 3.0) )
-            / units;    
+  return 1.43e-27 * sqrt(T) * (1.1 + 0.34 * exp(-pow(5.5 - log10(T), 2) / 3.0)) / units;
 }
 
-
-
-
-
-#endif
\ No newline at end of file
+#endif
diff --git a/src/chemistry_gpu/chemistry_gpu.h b/src/chemistry_gpu/chemistry_gpu.h
index 751059f07..79674c3a0 100644
--- a/src/chemistry_gpu/chemistry_gpu.h
+++ b/src/chemistry_gpu/chemistry_gpu.h
@@ -1,26 +1,23 @@
 #ifndef CHEMISTRY_GPU_H
 #define CHEMISTRY_GPU_H
 
-#include"../global/global.h"
+#include "../global/global.h"
 
 #define CHEM_TINY 1e-20
 
-//Define the type of a generic rate function.
-typedef Real (*Rate_Function_T)( Real, Real );
-
+// Define the type of a generic rate function.
+typedef Real (*Rate_Function_T)(Real, Real);
 
 // #define TEXTURES_UVB_INTERPOLATION
 
-struct Chemistry_Header
-{
+struct ChemistryHeader {
   Real gamma;
   Real density_conversion;
   Real energy_conversion;
   Real current_z;
   Real runtime_chemistry_step;
   Real H_fraction;
-  
-  
+
   // Units system
   Real a_value;
   Real density_units;
@@ -30,45 +27,45 @@ struct Chemistry_Header
   Real cooling_units;
   Real reaction_units;
   Real dens_number_conv;
-  
+
   // Cosmological parameters
   Real H0;
   Real Omega_M;
   Real Omega_L;
-    
+
   // Interpolation tables for the rates
-  int  N_Temp_bins;
+  int N_Temp_bins;
   Real Temp_start;
   Real Temp_end;
-  
+
   Real *cool_ceHI_d;
   Real *cool_ceHeI_d;
   Real *cool_ceHeII_d;
-  
+
   Real *cool_ciHI_d;
   Real *cool_ciHeI_d;
   Real *cool_ciHeII_d;
   Real *cool_ciHeIS_d;
-  
+
   Real *cool_reHII_d;
-  Real *cool_reHeII1_d;
-  Real *cool_reHeII2_d;
+  Real *cool_reHeII_1_d;
+  Real *cool_reHeII_2_d;
   Real *cool_reHeIII_d;
-  
+
   Real *cool_brem_d;
-  
+
   Real cool_compton;
-  
+
   Real *k_coll_i_HI_d;
   Real *k_coll_i_HeI_d;
   Real *k_coll_i_HeII_d;
   Real *k_coll_i_HI_HI_d;
   Real *k_coll_i_HI_HeI_d;
-  
+
   Real *k_recomb_HII_d;
   Real *k_recomb_HeII_d;
   Real *k_recomb_HeIII_d;
-  
+
   int max_iter;
 
   int n_uvb_rates_samples;
@@ -79,32 +76,26 @@ struct Chemistry_Header
   float *photo_heat_HI_rate_d;
   float *photo_heat_HeI_rate_d;
   float *photo_heat_HeII_rate_d;
-  
 };
 
-
-
-
 #ifdef CHEMISTRY_GPU
 
 class Chem_GPU
 {
-public:
-  
+ public:
   int nx;
   int ny;
   int nz;
-  
-  
+
   bool use_case_B_recombination;
-  
+
   Real scale_factor_UVB_on;
 
   float *cosmo_params_h;
   float *cosmo_params_d;
-  
+
   int n_uvb_rates_samples;
-  float *rates_z_h;  
+  float *rates_z_h;
   float *Heat_rates_HI_h;
   float *Heat_rates_HeI_h;
   float *Heat_rates_HeII_h;
@@ -119,52 +110,49 @@ class Chem_GPU
   float *Ion_rates_HI_d;
   float *Ion_rates_HeI_d;
   float *Ion_rates_HeII_d;
-  
+
   struct Chemistry_Header H;
-  
-  
-  struct Fields
-  {
+
+  struct Fields {
     Real *temperature_h;
   } Fields;
-  
-  
-  void Allocate_Array_GPU_Real( Real **array_dev, int size );
-  void Copy_Real_Array_to_Device( int size, Real *array_h, Real *array_d );
-  void Free_Array_GPU_Real( Real *array_dev );
-  void Allocate_Array_GPU_float( float **array_dev, int size );
-  void Copy_Float_Array_to_Device( int size, float *array_h, float *array_d );
-  void Free_Array_GPU_float( float *array_dev );
-      
-  void Initialize( struct parameters *P );
-  
-  void Generate_Reaction_Rate_Table( Real **rate_table_array_d, Rate_Function_T rate_function, Real units  );
-  
+
+  void Allocate_Array_GPU_Real(Real **array_dev, int size);
+  void Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d);
+  void Free_Array_GPU_Real(Real *array_dev);
+  void Allocate_Array_GPU_float(float **array_dev, int size);
+  void Copy_Float_Array_to_Device(int size, float *array_h, float *array_d);
+  void Free_Array_GPU_float(float *array_dev);
+
+  void Initialize(struct Parameters *P);
+
+  void Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Function_T rate_function, Real units);
+
   void Initialize_Cooling_Rates();
-  
-    void Initialize_Reaction_Rates();
-  
-  void Initialize_UVB_Ionization_and_Heating_Rates( struct parameters *P );
-  
-  void Load_UVB_Ionization_and_Heating_Rates(  struct parameters *P );
-  
-  void Copy_UVB_Rates_to_GPU();
-    
-  void Reset( );
-  
-  #ifdef TEXTURES_UVB_INTERPOLATION
-  void Bind_GPU_Textures( int size,  float *H_HI_h, float *H_HeI_h, float *H_HeII_h , float *I_HI_h, float *I_HeI_h, float *I_HeII_h );
-  #endif
 
-};
+  void Initialize_Reaction_Rates();
+
+  void Initialize_UVB_Ionization_and_Heating_Rates(struct Parameters *P);
+
+  void Load_UVB_Ionization_and_Heating_Rates(struct Parameters *P);
 
+  void Copy_UVB_Rates_to_GPU();
 
-/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma)
-*  \brief When passed an array of conserved variables and a timestep, update the ionization fractions of H and He and update 
-the internal energy to account for radiative cooling and photoheating from the UV background. */
-void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H);
+  void Reset();
 
+  #ifdef TEXTURES_UVB_INTERPOLATION
+  void Bind_GPU_Textures(int size, float *H_HI_h, float *H_HeI_h, float *H_HeII_h, float *I_HI_h, float *I_HeI_h,
+                         float *I_HeII_h);
+  #endif
+};
 
+/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int
+n_ghost, int n_fields, Real dt, Real gamma)
+*  \brief When passed an array of conserved variables and a timestep, update the
+ionization fractions of H and He and update the internal energy to account for
+radiative cooling and photoheating from the UV background. */
+void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt,
+                         Chemistry_Header &Chem_H);
 
 #endif
 #endif
\ No newline at end of file
diff --git a/src/chemistry_gpu/chemistry_io.cpp b/src/chemistry_gpu/chemistry_io.cpp
index 20cb53d7b..20d23318e 100644
--- a/src/chemistry_gpu/chemistry_io.cpp
+++ b/src/chemistry_gpu/chemistry_io.cpp
@@ -1,110 +1,92 @@
 #ifdef CHEMISTRY_GPU
 
-#include <iostream>
-#include <vector>
-#include <string>
-#include <sstream>
-#include <fstream>
-#include "chemistry_gpu.h"
-#include "../io/io.h"
-
-
-using namespace std;
-
-
-void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(  struct parameters *P ){
-
-
+  #include <cstring>  // provides std::strcpy (strcpy in this file)
+  #include <fstream>
+  #include <iostream>
+  #include <sstream>
+  #include <string>
+  #include <vector>
+
+  #include "../io/io.h"
+  #include "chemistry_gpu.h"
+
+void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(struct Parameters *P)
+{
   char uvb_filename[100];
   // create the filename to read from
   strcpy(uvb_filename, P->UVB_rates_file);
-  chprintf( " Loading UVB rates: %s\n", uvb_filename);
-  
+  chprintf(" Loading UVB rates: %s\n", uvb_filename);
 
-  
   std::fstream in(uvb_filename);
   std::string line;
   std::vector<std::vector<float>> v;
   int i = 0;
-  if (in.is_open()){
-    while (std::getline(in, line))
-    {
-       if ( line.find("#") == 0 ) continue;
-      
-       float value;
-       std::stringstream ss(line);
-       // chprintf( "%s \n", line.c_str() );
-       v.push_back(std::vector<float>());
-       
-       while (ss >> value){
-         v[i].push_back(value);
-       }
-       i += 1;    
+  if (in.is_open()) {
+    while (std::getline(in, line)) {
+      if (line.find("#") == 0) continue;
+
+      float value;
+      std::stringstream ss(line);
+      // chprintf( "%s \n", line.c_str() );
+      v.push_back(std::vector<float>());
+
+      while (ss >> value) {
+        v[i].push_back(value);
+      }
+      i += 1;
     }
     in.close();
-  } else{
+  } else {
     chprintf(" Error: Unable to open UVB rates file: %s\n", uvb_filename);
     exit(1);
   }
-  
+
   int n_lines = i;
-  
-  chprintf( " Loaded %d lines in file\n", n_lines);
-  
-  rates_z_h         = (float *)malloc(sizeof(float)*n_lines);
-  Heat_rates_HI_h   = (float *)malloc(sizeof(float)*n_lines);
-  Heat_rates_HeI_h  = (float *)malloc(sizeof(float)*n_lines);
-  Heat_rates_HeII_h = (float *)malloc(sizeof(float)*n_lines);
-  Ion_rates_HI_h    = (float *)malloc(sizeof(float)*n_lines);
-  Ion_rates_HeI_h   = (float *)malloc(sizeof(float)*n_lines);
-  Ion_rates_HeII_h  = (float *)malloc(sizeof(float)*n_lines);
-  
-  Real eV_to_ergs, heat_units, ion_units; 
+
+  chprintf(" Loaded %d lines in file\n", n_lines);
+
+  rates_z_h         = (float *)malloc(sizeof(float) * n_lines);
+  Heat_rates_HI_h   = (float *)malloc(sizeof(float) * n_lines);
+  Heat_rates_HeI_h  = (float *)malloc(sizeof(float) * n_lines);
+  Heat_rates_HeII_h = (float *)malloc(sizeof(float) * n_lines);
+  Ion_rates_HI_h    = (float *)malloc(sizeof(float) * n_lines);
+  Ion_rates_HeI_h   = (float *)malloc(sizeof(float) * n_lines);
+  Ion_rates_HeII_h  = (float *)malloc(sizeof(float) * n_lines);
+
+  Real eV_to_ergs, heat_units, ion_units;
   eV_to_ergs = 1.60218e-12;
   heat_units = eV_to_ergs / H.cooling_units;
   ion_units  = H.time_units;
-  
-  for (i=0; i<n_lines; i++ ){
-    rates_z_h[i] = v[i][0] ;
+
+  for (i = 0; i < n_lines; i++) {
+    rates_z_h[i]         = v[i][0];
     Ion_rates_HI_h[i]    = v[i][1] * ion_units;
     Heat_rates_HI_h[i]   = v[i][2] * heat_units;
     Ion_rates_HeI_h[i]   = v[i][3] * ion_units;
     Heat_rates_HeI_h[i]  = v[i][4] * heat_units;
     Ion_rates_HeII_h[i]  = v[i][5] * ion_units;
     Heat_rates_HeII_h[i] = v[i][6] * heat_units;
-    // chprintf( " %f  %e  %e  %e   \n", rates_z_h[i], Heat_rates_HI_h[i],  Heat_rates_HeI_h[i],  Heat_rates_HeII_h[i]);
-    // chprintf( " %f  %f  \n", rates_z_h[i], Heat_rates_HI_h[i] );
+    // chprintf( " %f  %e  %e  %e   \n", rates_z_h[i], Heat_rates_HI_h[i],
+    // Heat_rates_HeI_h[i],  Heat_rates_HeII_h[i]); chprintf( " %f  %f  \n",
+    // rates_z_h[i], Heat_rates_HI_h[i] );
   }
-  
-  for ( i=0; i<n_lines-1; i++ ){
-    if ( rates_z_h[i] > rates_z_h[i+1] ){
-      chprintf( " ERROR: UVB rates must be ordered such that redshift is increasing as the rows increase in the file\n", uvb_filename);
+
+  for (i = 0; i < n_lines - 1; i++) {
+    if (rates_z_h[i] > rates_z_h[i + 1]) {
+      chprintf(
+          " ERROR: UVB rates must be ordered such that redshift is increasing "
+          "as the rows increase in the file\n",
+          uvb_filename);
       exit(2);
     }
   }
-  
+
   n_uvb_rates_samples = n_lines;
-  scale_factor_UVB_on = 1 / (rates_z_h[n_uvb_rates_samples-1] + 1 );
+  scale_factor_UVB_on = 1 / (rates_z_h[n_uvb_rates_samples - 1] + 1);
   chprintf(" Loaded UVB rates: \n");
-  chprintf("  N redshift values: %d \n", n_uvb_rates_samples );
-  chprintf("  z_min = %f    z_max = %f \n", rates_z_h[0], rates_z_h[n_uvb_rates_samples-1] );
-  chprintf("  UVB on:  a=%f \n", scale_factor_UVB_on  );
-  
-
+  chprintf("  N redshift values: %d \n", n_uvb_rates_samples);
+  chprintf("  z_min = %f    z_max = %f \n", rates_z_h[0], rates_z_h[n_uvb_rates_samples - 1]);
+  chprintf("  UVB on:  a=%f \n", scale_factor_UVB_on);
 }
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#endif
\ No newline at end of file
+#endif
diff --git a/src/chemistry_gpu/rates.cuh b/src/chemistry_gpu/rates.cuh
index 5a9455824..bf09aabc9 100644
--- a/src/chemistry_gpu/rates.cuh
+++ b/src/chemistry_gpu/rates.cuh
@@ -1,170 +1,162 @@
 #ifdef CHEMISTRY_GPU
 
-#include "chemistry_gpu.h"
-#include"../global/global_cuda.h"
-
-
+  #include "../global/global_cuda.h"
+  #include "chemistry_gpu.h"
 
 // Calculation of k1 (HI + e --> HII + 2e)
 // k1_rate
-__host__ __device__ Real coll_i_HI_rate(Real T, Real units );
+__host__ __device__ Real coll_i_HI_rate(Real T, Real units);
 
-//Calculation of k3 (HeI + e --> HeII + 2e)
-// k3_rate
-__host__ __device__ Real coll_i_HeI_rate(Real T, Real units );
+// Calculation of k3 (HeI + e --> HeII + 2e)
+//  k3_rate
+__host__ __device__ Real coll_i_HeI_rate(Real T, Real units);
 
-//Calculation of k4 (HeII + e --> HeI + photon)
-// k4_rate
-__host__ __device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B );
+// Calculation of k4 (HeII + e --> HeI + photon)
+//  k4_rate
+__host__ __device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B);
 // k4_rate Case A
-__host__ __device__ Real recomb_HeII_rate_case_A(Real T, Real units );
+__host__ __device__ Real recomb_HeII_rate_case_A(Real T, Real units);
 // k4_rate Case B
-__host__ __device__ Real recomb_HeII_rate_case_B(Real T, Real units );
+__host__ __device__ Real recomb_HeII_rate_case_B(Real T, Real units);
 
-//Calculation of k2 (HII + e --> HI + photon)
-// k2_rate
-__host__ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B );
+// Calculation of k2 (HII + e --> HI + photon)
+//  k2_rate
+__host__ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B);
 // k2_rate Case A
-__host__ __device__ Real recomb_HII_rate_case_A(Real T, Real units );
+__host__ __device__ Real recomb_HII_rate_case_A(Real T, Real units);
 // k2_rate Case B
-__host__ __device__ Real recomb_HII_rate_case_B(Real T, Real units );
+__host__ __device__ Real recomb_HII_rate_case_B(Real T, Real units);
 
-//Calculation of k5 (HeII + e --> HeIII + 2e)
-// k5_rate
-__host__ __device__ Real coll_i_HeII_rate(Real T, Real units );
+// Calculation of k5 (HeII + e --> HeIII + 2e)
+//  k5_rate
+__host__ __device__ Real coll_i_HeII_rate(Real T, Real units);
 
-//Calculation of k6 (HeIII + e --> HeII + photon)
-// k6_rate
-__host__ __device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B );
+// Calculation of k6 (HeIII + e --> HeII + photon)
+//  k6_rate
+__host__ __device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B);
 // k6_rate Case A
-__host__ __device__ Real recomb_HeIII_rate_case_A(Real T, Real units );
+__host__ __device__ Real recomb_HeIII_rate_case_A(Real T, Real units);
 // k6_rate Case B
-__host__ __device__ Real recomb_HeIII_rate_case_B(Real T, Real units );
-
-//Calculation of k57 (HI + HI --> HII + HI + e)
-// k57_rate
-__host__ __device__ Real coll_i_HI_HI_rate(Real T, Real units );
+__host__ __device__ Real recomb_HeIII_rate_case_B(Real T, Real units);
 
-//Calculation of k58 (HI + HeI --> HII + HeI + e)
-// k58_rate
-__host__ __device__ Real coll_i_HI_HeI_rate(Real T, Real units );
+// Calculation of k57 (HI + HI --> HII + HI + e)
+//  k57_rate
+__host__ __device__ Real coll_i_HI_HI_rate(Real T, Real units);
 
-//Calculation of ceHI.
-// Cooling collisional excitation HI
-__host__ __device__ Real cool_ceHI_rate(Real T, Real units );
+// Calculation of k58 (HI + HeI --> HII + HeI + e)
+//  k58_rate
+__host__ __device__ Real coll_i_HI_HeI_rate(Real T, Real units);
 
-//Calculation of ceHeI.
-// Cooling collisional ionization HeI
-__host__ __device__ Real cool_ceHeI_rate(Real T, Real units );
+// Calculation of ceHI.
+//  Cooling collisional excitation HI
+__host__ __device__ Real cool_ceHI_rate(Real T, Real units);
 
-//Calculation of ceHeII.
-// Cooling collisional excitation HeII
-__host__ __device__ Real cool_ceHeII_rate(Real T, Real units );
+// Calculation of ceHeI.
+//  Cooling collisional ionization HeI
+__host__ __device__ Real cool_ceHeI_rate(Real T, Real units);
 
-//Calculation of ciHeIS.
-// Cooling collisional ionization HeIS
-__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units );
+// Calculation of ceHeII.
+//  Cooling collisional excitation HeII
+__host__ __device__ Real cool_ceHeII_rate(Real T, Real units);
 
-//Calculation of ciHI.
-// Cooling collisional ionization HI
-__host__ __device__ Real cool_ciHI_rate(Real T, Real units );
+// Calculation of ciHeIS.
+//  Cooling collisional ionization HeIS
+__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units);
 
+// Calculation of ciHI.
+//  Cooling collisional ionization HI
+__host__ __device__ Real cool_ciHI_rate(Real T, Real units);
 
-//Calculation of ciHeI.
-// Cooling collisional ionization HeI
-__host__ __device__ Real cool_ciHeI_rate(Real T, Real units );
+// Calculation of ciHeI.
+//  Cooling collisional ionization HeI
+__host__ __device__ Real cool_ciHeI_rate(Real T, Real units);
 
-//Calculation of ciHeII.
-// Cooling collisional ionization HeII
-__host__ __device__ Real cool_ciHeII_rate(Real T, Real units );
+// Calculation of ciHeII.
+//  Cooling collisional ionization HeII
+__host__ __device__ Real cool_ciHeII_rate(Real T, Real units);
 
-
-//Calculation of reHII.
-// Cooling recombination HII
-__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B );
+// Calculation of reHII.
+//  Cooling recombination HII
+__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B);
 // Cooling recombination HII Case A
-__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units );
+__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units);
 // Cooling recombination HII Case B
-__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units );
+__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units);
 
-//Calculation of reHII.
-// Cooling recombination HeII
-__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B );
+// Calculation of reHII.
+//  Cooling recombination HeII
+__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B);
 // Cooling recombination HeII Case A
-__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units );
+__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units);
 // Cooling recombination HeII Case B
-__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units );
+__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units);
 
-//Calculation of reHII2.
-// Cooling recombination HeII Dielectronic
-__host__ __device__ Real cool_reHeII2_rate(Real T, Real units );
+// Calculation of reHII2.
+//  Cooling recombination HeII Dielectronic
+__host__ __device__ Real cool_reHeII2_rate(Real T, Real units);
 
-//Calculation of reHIII.
-// Cooling recombination HeIII
-__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B );
+// Calculation of reHIII.
+//  Cooling recombination HeIII
+__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B);
 // Cooling recombination HeIII Case A
-__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units );
+__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units);
 // Cooling recombination HeIII Case B
-__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units );
+__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units);
 
-//Calculation of brem.
-// Cooling Bremsstrahlung
-__host__ __device__ Real cool_brem_rate(Real T, Real units );
+// Calculation of brem.
+//  Cooling Bremsstrahlung
+__host__ __device__ Real cool_brem_rate(Real T, Real units);
 
-//Calculation of comp.
-// Compton cooling
+// Calculation of comp.
+//  Compton cooling
 __host__ __device__ Real comp_rate(Real n_e, Real T, Real zr, Real units);
-__host__ __device__ Real cool_compton_rate( Real T, Real units );
-
+__host__ __device__ Real cool_compton_rate(Real T, Real units);
 
 // X-ray compton heating
-__host__ __device__ Real xray_heat_rate( Real n_e, Real T,  Real Redshift, Real units );
-
-
-// Colisional excitation of neutral hydrogen (HI) and singly ionized helium (HeII)
-Real __device__ Collisional_Ionization_Rate_e_HI_Abel97( Real temp );
-
-Real __device__ Recombination_Rate_HII_Abel97( Real temp );
-
-Real __device__ Collisional_Ionization_Rate_e_HeI_Abel97( Real temp );
-  
-Real __device__ Collisional_Ionization_Rate_e_HeII_Abel97( Real temp );
+__host__ __device__ Real xray_heat_rate(Real n_e, Real T, Real Redshift, Real units);
 
-Real __device__ Collisional_Ionization_Rate_HI_HI_Lenzuni91( Real temp );
+// Colisional excitation of neutral hydrogen (HI) and singly ionized helium
+// (HeII)
+Real __device__ Collisional_Ionization_Rate_e_HI_Abel97(Real temp);
 
-Real __device__ Collisional_Ionization_Rate_HII_HI_Lenzuni91( Real temp );
+Real __device__ Recombination_Rate_HII_Abel97(Real temp);
 
-Real __device__ Collisional_Ionization_Rate_HeI_HI_Lenzuni91( Real temp );
+Real __device__ Collisional_Ionization_Rate_e_HeI_Abel97(Real temp);
 
-Real __device__ Recombination_Rate_HII_Hui97( Real temp );
+Real __device__ Collisional_Ionization_Rate_e_HeII_Abel97(Real temp);
 
-Real __device__ Recombination_Rate_HeII_Hui97( Real temp );
+Real __device__ Collisional_Ionization_Rate_HI_HI_Lenzuni91(Real temp);
 
-Real __device__ Recombination_Rate_HeIII_Hui97( Real temp );
+Real __device__ Collisional_Ionization_Rate_HII_HI_Lenzuni91(Real temp);
 
+Real __device__ Collisional_Ionization_Rate_HeI_HI_Lenzuni91(Real temp);
 
-Real __device__ Cooling_Rate_Recombination_HII_Hui97( Real n_e,  Real n_HII, Real temp );
+Real __device__ Recombination_Rate_HII_Hui97(Real temp);
 
-Real __device__ Cooling_Rate_Recombination_HeII_Hui97( Real n_e, Real n_HII, Real temp );
+Real __device__ Recombination_Rate_HeII_Hui97(Real temp);
 
-Real __device__ Cooling_Rate_Recombination_HeIII_Hui97( Real n_e, Real n_HII, Real temp );
+Real __device__ Recombination_Rate_HeIII_Hui97(Real temp);
 
-Real __device__ Recombination_Rate_dielectronic_HeII_Hui97( Real temp );
+Real __device__ Cooling_Rate_Recombination_HII_Hui97(Real n_e, Real n_HII, Real temp);
 
-Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97( Real n_e, Real n_HeII, Real temp );
+Real __device__ Cooling_Rate_Recombination_HeII_Hui97(Real n_e, Real n_HII, Real temp);
 
-Real __device__ Collisional_Ionization_Rate_e_HI_Hui97( Real temp );
+Real __device__ Cooling_Rate_Recombination_HeIII_Hui97(Real n_e, Real n_HII, Real temp);
 
-Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97( Real n_e, Real n_HI, Real temp );
+Real __device__ Recombination_Rate_dielectronic_HeII_Hui97(Real temp);
 
-Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97( Real n_e, Real n_HeII,  Real temp );
+Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97(Real n_e, Real n_HeII, Real temp);
 
-// Compton cooling off the CMB 
-Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01( Real n_e, Real temp, Real z );
+Real __device__ Collisional_Ionization_Rate_e_HI_Hui97(Real temp);
 
-// Real __device__ Cooling_Rate_Compton_CMB_Peebles93( Real n_e, Real temp, Real current_z, cosmo );
+Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97(Real n_e, Real n_HI, Real temp);
 
+Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97(Real n_e, Real n_HeII, Real temp);
 
+// Compton cooling off the CMB
+Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01(Real n_e, Real temp, Real z);
 
+// Real __device__ Cooling_Rate_Compton_CMB_Peebles93( Real n_e, Real temp, Real
+// current_z, cosmo );
 
 #endif
\ No newline at end of file
diff --git a/src/chemistry_gpu/rates_Katz95.cuh b/src/chemistry_gpu/rates_Katz95.cuh
index 4942f1558..18c5e54c2 100644
--- a/src/chemistry_gpu/rates_Katz95.cuh
+++ b/src/chemistry_gpu/rates_Katz95.cuh
@@ -1,58 +1,53 @@
 #ifdef CHEMISTRY_GPU
 
-#include "chemistry_gpu.h"
-#include"../global/global_cuda.h"
+  #include "../global/global_cuda.h"
+  #include "chemistry_gpu.h"
 
+// Colisional excitation of neutral hydrogen (HI) and singly ionized helium
+// (HeII)
 
-// Colisional excitation of neutral hydrogen (HI) and singly ionized helium (HeII)
-
-Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95( Real n_e, Real n_HI, Real temp );
-
-Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95( Real n_e, Real n_HeII, Real temp );
-
+Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95(Real n_e, Real n_HI, Real temp);
 
+Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95(Real n_e, Real n_HeII, Real temp);
 
 // Colisional ionization  of HI, HeI and HeII
-Real __device__  Cooling_Rate_Collisional_Ionization_e_HI_Katz95( Real n_e, Real n_HI, Real temp );
+Real __device__ Cooling_Rate_Collisional_Ionization_e_HI_Katz95(Real n_e, Real n_HI, Real temp);
 
+Real __device__ Cooling_Rate_Collisional_Ionization_e_HeI_Katz95(Real n_e, Real n_HeI, Real temp);
 
-Real __device__  Cooling_Rate_Collisional_Ionization_e_HeI_Katz95( Real n_e, Real n_HeI, Real temp );
+Real __device__ Cooling_Rate_Collisional_Ionization_e_HeII_Katz95(Real n_e, Real n_HeII, Real temp);
 
-Real __device__  Cooling_Rate_Collisional_Ionization_e_HeII_Katz95( Real n_e, Real n_HeII, Real temp );
+Real __device__ Collisional_Ionization_Rate_e_HI_Katz95(Real temp);
 
-Real __device__  Collisional_Ionization_Rate_e_HI_Katz95( Real temp );
+Real __device__ Collisional_Ionization_Rate_e_HeI_Katz95(Real temp);
 
-Real __device__  Collisional_Ionization_Rate_e_HeI_Katz95( Real temp );
-
-Real __device__  Collisional_Ionization_Rate_e_HeII_Katz95( Real temp );
+Real __device__ Collisional_Ionization_Rate_e_HeII_Katz95(Real temp);
 
 // Standard Recombination of HII, HeII and HeIII
 
-Real __device__  Cooling_Rate_Recombination_HII_Katz95( Real n_e, Real n_HII, Real temp );
+Real __device__ Cooling_Rate_Recombination_HII_Katz95(Real n_e, Real n_HII, Real temp);
 
-Real __device__  Cooling_Rate_Recombination_HeII_Katz95( Real n_e, Real n_HeII, Real temp );
+Real __device__ Cooling_Rate_Recombination_HeII_Katz95(Real n_e, Real n_HeII, Real temp);
 
-Real __device__  Cooling_Rate_Recombination_HeIII_Katz95( Real n_e, Real n_HeIII, Real temp );
+Real __device__ Cooling_Rate_Recombination_HeIII_Katz95(Real n_e, Real n_HeIII, Real temp);
 
-Real __device__  Recombination_Rate_HII_Katz95( Real temp );
+Real __device__ Recombination_Rate_HII_Katz95(Real temp);
 
-Real __device__  Recombination_Rate_HeII_Katz95( Real temp );
+Real __device__ Recombination_Rate_HeII_Katz95(Real temp);
 
-Real __device__  Recombination_Rate_HeIII_Katz95( Real temp );
+Real __device__ Recombination_Rate_HeIII_Katz95(Real temp);
 
 // Dielectronic recombination of HeII
-Real __device__  Cooling_Rate_Recombination_dielectronic_HeII_Katz95( Real n_e, Real n_HeII, Real temp );
-
-Real __device__  Recombination_Rate_dielectronic_HeII_Katz95( Real temp );
-
-// Free-Free emission (Bremsstrahlung) 
-Real __device__  gaunt_factor( Real log10_T );
+Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Katz95(Real n_e, Real n_HeII, Real temp);
 
-Real __device__  Cooling_Rate_Bremsstrahlung_Katz95( Real n_e, Real n_HII, Real n_HeII, Real n_HeIII, Real temp );
+Real __device__ Recombination_Rate_dielectronic_HeII_Katz95(Real temp);
 
+// Free-Free emission (Bremsstrahlung)
+Real __device__ gaunt_factor(Real log10_T);
 
-// Compton cooling off the CMB 
-Real __device__  Cooling_Rate_Compton_CMB_Katz95( Real n_e, Real temp, Real z );
+Real __device__ Cooling_Rate_Bremsstrahlung_Katz95(Real n_e, Real n_HII, Real n_HeII, Real n_HeIII, Real temp);
 
+// Compton cooling off the CMB
+Real __device__ Cooling_Rate_Compton_CMB_Katz95(Real n_e, Real temp, Real z);
 
 #endif
\ No newline at end of file
diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu
index f1d0a8ee5..5cbebbb72 100644
--- a/src/cooling/cooling_cuda.cu
+++ b/src/cooling/cooling_cuda.cu
@@ -1,199 +1,195 @@
 /*! \file cooling_cuda.cu
  *  \brief Functions to calculate cooling rate for a given rho, P, dt. */
 
-#ifdef CUDA
 #ifdef COOLING_GPU
 
-#include "../utils/gpu.hpp"
-#include <math.h>
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../cooling/cooling_cuda.h"
+  #include <math.h>
 
-#ifdef CLOUDY_COOL
-#include "../cooling/texture_utilities.h"
-#endif
+  #include "../cooling/cooling_cuda.h"
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../utils/gpu.hpp"
+
+  #ifdef CLOUDY_COOL
+    #include "../cooling/texture_utilities.h"
+  #endif
 
 cudaTextureObject_t coolTexObj = 0;
 cudaTextureObject_t heatTexObj = 0;
 
-void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma){
-
-  int n_cells = nx*ny*nz;
-  int ngrid = (n_cells + TPB - 1) / TPB;
+void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma)
+{
+  int n_cells = nx * ny * nz;
+  int ngrid   = (n_cells + TPB - 1) / TPB;
   dim3 dim1dGrid(ngrid, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gama, coolTexObj, heatTexObj);
-  CudaCheckError();  
+  hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt,
+                     gama, coolTexObj, heatTexObj);
+  GPU_Error_Check();
 }
 
-
-/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
- *  \brief When passed an array of conserved variables and a timestep, adjust the value
-           of the total energy for each cell according to the specified cooling function. */
-__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
+/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int
+ n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj,
+ cudaTextureObject_t heatTexObj)
+ *  \brief When passed an array of conserved variables and a timestep, adjust
+ the value of the total energy for each cell according to the specified cooling
+ function. */
+__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt,
+                               Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
 {
-
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
   int is, ie, js, je, ks, ke;
   is = n_ghost;
-  ie = nx-n_ghost;
+  ie = nx - n_ghost;
   if (ny == 1) {
     js = 0;
     je = 1;
-  }
-  else {
+  } else {
     js = n_ghost;
-    je = ny-n_ghost;
+    je = ny - n_ghost;
   }
   if (nz == 1) {
     ks = 0;
     ke = 1;
-  }
-  else {
+  } else {
     ks = n_ghost;
-    ke = nz-n_ghost;
+    ke = nz - n_ghost;
   }
 
   Real d, E;
   Real n, T, T_init;
   Real del_T, dt_sub;
-  Real mu; // mean molecular weight
-  Real cool; //cooling rate per volume, erg/s/cm^3
-  //#ifndef DE
+  Real mu;    // mean molecular weight
+  Real cool;  // cooling rate per volume, erg/s/cm^3
+  // #ifndef DE
   Real vx, vy, vz, p;
-  //#endif
+  // #endif
   #ifdef DE
   Real ge;
   #endif
 
   mu = 0.6;
-  //mu = 1.27;
+  // mu = 1.27;
 
   // get a global thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int id = threadIdx.x + blockId * blockDim.x;
-  int zid = id / (nx*ny);
-  int yid = (id - zid*nx*ny) / nx;
-  int xid = id - zid*nx*ny - yid*nx;
-
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int id      = threadIdx.x + blockId * blockDim.x;
+  int zid     = id / (nx * ny);
+  int yid     = (id - zid * nx * ny) / nx;
+  int xid     = id - zid * nx * ny - yid * nx;
 
   // only threads corresponding to real cells do the calculation
   if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) {
-
     // load values of density and pressure
-    d  =  dev_conserved[            id];
-    E  =  dev_conserved[4*n_cells + id];
+    d = dev_conserved[id];
+    E = dev_conserved[4 * n_cells + id];
     // don't apply cooling if this thread crashed
-    if (E < 0.0 || E != E) return;
-    //#ifndef DE
-    vx =  dev_conserved[1*n_cells + id] / d;
-    vy =  dev_conserved[2*n_cells + id] / d;
-    vz =  dev_conserved[3*n_cells + id] / d;
-    p  = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-    p  = fmax(p, (Real) TINY_NUMBER);
-    //#endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id] / d;
-    ge = fmax(ge, (Real) TINY_NUMBER);
-    #endif
+    if (E < 0.0 || E != E) {
+      return;
+    }
+    // #ifndef DE
+    vx = dev_conserved[1 * n_cells + id] / d;
+    vy = dev_conserved[2 * n_cells + id] / d;
+    vz = dev_conserved[3 * n_cells + id] / d;
+    p  = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
+    p  = fmax(p, (Real)TINY_NUMBER);
+  // #endif
+  #ifdef DE
+    ge = dev_conserved[(n_fields - 1) * n_cells + id] / d;
+    ge = fmax(ge, (Real)TINY_NUMBER);
+  #endif
 
     // calculate the number density of the gas (in cgs)
-    n = d*DENSITY_UNIT / (mu * MP);
+    n = d * DENSITY_UNIT / (mu * MP);
 
     // calculate the temperature of the gas
-    T_init = p*PRESSURE_UNIT/ (n*KB);
-    #ifdef DE
-    T_init = d*ge*(gamma-1.0)*PRESSURE_UNIT/(n*KB);
-    #endif
+    T_init = p * PRESSURE_UNIT / (n * KB);
+  #ifdef DE
+    T_init = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB);
+  #endif
 
     // calculate cooling rate per volume
     T = T_init;
-    // call the cooling function
-    #ifdef CLOUDY_COOL
+  // call the cooling function
+  #ifdef CLOUDY_COOL
     cool = Cloudy_cool(n, T, coolTexObj, heatTexObj);
-    #else
+  #else
     cool = CIE_cool(n, T);
-    #endif
+  #endif
 
     // calculate change in temperature given dt
-    del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB);
+    del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB);
 
     // limit change in temperature to 1%
-    while (del_T/T > 0.01) {
+    while (del_T / T > 0.01) {
       // what dt gives del_T = 0.01*T?
-      dt_sub = 0.01*T*n*KB/(cool*TIME_UNIT*(gamma-1.0));
+      dt_sub = 0.01 * T * n * KB / (cool * TIME_UNIT * (gamma - 1.0));
       // apply that dt
-      T -= cool*dt_sub*TIME_UNIT*(gamma-1.0)/(n*KB);
+      T -= cool * dt_sub * TIME_UNIT * (gamma - 1.0) / (n * KB);
       // how much time is left from the original timestep?
       dt -= dt_sub;
-      // calculate cooling again
-      #ifdef CLOUDY_COOL
+  // calculate cooling again
+  #ifdef CLOUDY_COOL
       cool = Cloudy_cool(n, T, coolTexObj, heatTexObj);
-      #else
+  #else
       cool = CIE_cool(n, T);
-      #endif
+  #endif
       // calculate new change in temperature
-      del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB);
+      del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB);
     }
 
     // calculate final temperature
     T -= del_T;
 
     // adjust value of energy based on total change in temperature
-    del_T = T_init - T; // total change in T
-    E -= n*KB*del_T / ((gamma-1.0)*ENERGY_UNIT);
-    #ifdef DE
-    ge -= KB*del_T / (mu*MP*(gamma-1.0)*SP_ENERGY_UNIT);
-    #endif
-    // calculate cooling rate for new T
-    #ifdef CLOUDY_COOL
+    del_T = T_init - T;  // total change in T
+    E -= n * KB * del_T / ((gamma - 1.0) * ENERGY_UNIT);
+  #ifdef DE
+    ge -= KB * del_T / (mu * MP * (gamma - 1.0) * SP_ENERGY_UNIT);
+  #endif
+
+  // calculate cooling rate for new T
+  #ifdef CLOUDY_COOL
     cool = Cloudy_cool(n, T, coolTexObj, heatTexObj);
-    #else
+  #else
     cool = CIE_cool(n, T);
-    //printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool);
-    #endif
+  // printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool);
+  #endif
 
     // and send back from kernel
-    dev_conserved[4*n_cells + id] = E;
-    #ifdef DE
-    dev_conserved[(n_fields-1)*n_cells + id] = d*ge;
-    #endif
-
+    dev_conserved[4 * n_cells + id] = E;
+  #ifdef DE
+    dev_conserved[(n_fields - 1) * n_cells + id] = d * ge;
+  #endif
   }
-
 }
 
-
 /* \fn __device__ Real test_cool(Real n, Real T)
  * \brief Cooling function from Creasey 2011. */
 __device__ Real test_cool(int tid, Real n, Real T)
 {
   Real T0, T1, lambda, cool;
-  T0 = 10000.0;
-  T1 = 20*T0;
+  T0   = 10000.0;
+  T1   = 20 * T0;
   cool = 0.0;
-  //lambda = 5.0e-24; //cooling coefficient, 5e-24 erg cm^3 s^-1
-  lambda = 5.0e-20; //cooling coefficient, 5e-24 erg cm^3 s^-1
+  // lambda = 5.0e-24; //cooling coefficient, 5e-24 erg cm^3 s^-1
+  lambda = 5.0e-20;  // cooling coefficient, 5e-24 erg cm^3 s^-1
 
   // constant cooling rate
-  //cool = n*n*lambda;
+  // cool = n*n*lambda;
 
   // Creasey cooling function
-  if (T >= T0 && T <= 0.5*(T1+T0)) {
-    cool = n*n*lambda*(T - T0) / T0;
+  if (T >= T0 && T <= 0.5 * (T1 + T0)) {
+    cool = n * n * lambda * (T - T0) / T0;
   }
-  if (T >= 0.5*(T1+T0) && T <= T1) {
-    cool = n*n*lambda*(T1 - T) / T0;
+  if (T >= 0.5 * (T1 + T0) && T <= T1) {
+    cool = n * n * lambda * (T1 - T) / T0;
   }
 
-
-  //printf("%d %f %f\n", tid, T, cool);
+  // printf("%d %f %f\n", tid, T, cool);
   return cool;
-
 }
 
-
 /* \fn __device__ Real primordial_cool(Real n, Real T)
  * \brief Primordial hydrogen/helium cooling curve
           derived according to Katz et al. 1996. */
@@ -210,74 +206,76 @@ __device__ Real primordial_cool(Real n, Real T)
   // set flag to 1 for photoionization & heating
   heat_flag = 0;
 
-  //Real X = 0.76; //hydrogen abundance by mass
-  Y = 0.24; //helium abundance by mass
-  y = Y/(4 - 4*Y);
+  // Real X = 0.76; //hydrogen abundance by mass
+  Y = 0.24;  // helium abundance by mass
+  y = Y / (4 - 4 * Y);
 
   // set the hydrogen number density
   n_h = n;
 
   // calculate the recombination and collisional ionization rates
   // (Table 2 from Katz 1996)
-  alpha_hp   = (8.4e-11) * (1.0/sqrt(T)) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7))));
-  alpha_hep  = (1.5e-10) * (pow(T,(-0.6353)));
-  alpha_d    = (1.9e-3)  * (pow(T,(-1.5))) * exp(-470000.0/T) * (1.0 + 0.3*exp(-94000.0/T));
-  alpha_hepp = (3.36e-10)* (1.0/sqrt(T)) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7))));
-  gamma_eh0  = (5.85e-11)* sqrt(T) * exp(-157809.1/T) * (1.0 / (1.0 + sqrt(T/1e5)));
-  gamma_ehe0 = (2.38e-11)* sqrt(T) * exp(-285335.4/T) * (1.0 / (1.0 + sqrt(T/1e5)));
-  gamma_ehep = (5.68e-12)* sqrt(T) * exp(-631515.0/T) * (1.0 / (1.0 + sqrt(T/1e5)));
+  alpha_hp   = (8.4e-11) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7))));
+  alpha_hep  = (1.5e-10) * (pow(T, (-0.6353)));
+  alpha_d    = (1.9e-3) * (pow(T, (-1.5))) * exp(-470000.0 / T) * (1.0 + 0.3 * exp(-94000.0 / T));
+  alpha_hepp = (3.36e-10) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7))));
+  gamma_eh0  = (5.85e-11) * sqrt(T) * exp(-157809.1 / T) * (1.0 / (1.0 + sqrt(T / 1e5)));
+  gamma_ehe0 = (2.38e-11) * sqrt(T) * exp(-285335.4 / T) * (1.0 / (1.0 + sqrt(T / 1e5)));
+  gamma_ehep = (5.68e-12) * sqrt(T) * exp(-631515.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5)));
   // externally evaluated integrals for photoionization rates
   // assumed J(nu) = 10^-22 (nu_L/nu)
-  gamma_lh0 = 3.19851e-13;
+  gamma_lh0  = 3.19851e-13;
   gamma_lhe0 = 3.13029e-13;
   gamma_lhep = 2.00541e-14;
   // externally evaluated integrals for heating rates
-  e_h0 = 2.4796e-24;
+  e_h0  = 2.4796e-24;
   e_he0 = 6.86167e-24;
   e_hep = 6.21868e-25;
 
-
   // assuming no photoionization, solve equations for number density of
   // each species
-  n_e = n_h; //as a first guess, use the hydrogen number density
+  n_e    = n_h;  // as a first guess, use the hydrogen number density
   n_iter = 20;
-  diff = 1.0;
-  tol = 1.0e-6;
+  diff   = 1.0;
+  tol    = 1.0e-6;
   if (heat_flag) {
-    for (int i=0; i<n_iter; i++) {
+    for (int i = 0; i < n_iter; i++) {
       n_e_old = n_e;
-      n_h0   = n_h*alpha_hp / (alpha_hp + gamma_eh0 + gamma_lh0/n_e);
-      n_hp   = n_h - n_h0;
-      n_hep  = y*n_h / (1.0 + (alpha_hep + alpha_d)/(gamma_ehe0 + gamma_lhe0/n_e) + (gamma_ehep + gamma_lhep/n_e)/alpha_hepp );
-      n_he0  = n_hep*(alpha_hep + alpha_d) / (gamma_ehe0 + gamma_lhe0/n_e);
-      n_hepp = n_hep*(gamma_ehep + gamma_lhep/n_e)/alpha_hepp;
-      n_e    = n_hp + n_hep + 2*n_hepp;
-      diff = fabs(n_e_old - n_e);
-      if (diff < tol) break;
+      n_h0    = n_h * alpha_hp / (alpha_hp + gamma_eh0 + gamma_lh0 / n_e);
+      n_hp    = n_h - n_h0;
+      n_hep   = y * n_h /
+              (1.0 + (alpha_hep + alpha_d) / (gamma_ehe0 + gamma_lhe0 / n_e) +
+               (gamma_ehep + gamma_lhep / n_e) / alpha_hepp);
+      n_he0  = n_hep * (alpha_hep + alpha_d) / (gamma_ehe0 + gamma_lhe0 / n_e);
+      n_hepp = n_hep * (gamma_ehep + gamma_lhep / n_e) / alpha_hepp;
+      n_e    = n_hp + n_hep + 2 * n_hepp;
+      diff   = fabs(n_e_old - n_e);
+      if (diff < tol) {
+        break;
+      }
     }
-  }
-  else {
-    n_h0   = n_h*alpha_hp / (alpha_hp + gamma_eh0);
+  } else {
+    n_h0   = n_h * alpha_hp / (alpha_hp + gamma_eh0);
     n_hp   = n_h - n_h0;
-    n_hep  = y*n_h / (1.0 + (alpha_hep + alpha_d)/(gamma_ehe0) + (gamma_ehep)/alpha_hepp );
-    n_he0  = n_hep*(alpha_hep + alpha_d) / (gamma_ehe0);
-    n_hepp = n_hep*(gamma_ehep)/alpha_hepp;
-    n_e    = n_hp + n_hep + 2*n_hepp;
+    n_hep  = y * n_h / (1.0 + (alpha_hep + alpha_d) / (gamma_ehe0) + (gamma_ehep) / alpha_hepp);
+    n_he0  = n_hep * (alpha_hep + alpha_d) / (gamma_ehe0);
+    n_hepp = n_hep * (gamma_ehep) / alpha_hepp;
+    n_e    = n_hp + n_hep + 2 * n_hepp;
   }
 
   // using number densities, calculate cooling rates for
   // various processes (Table 1 from Katz 1996)
-  le_h0 = (7.50e-19) * exp(-118348.0/T) * (1.0 / (1.0 + sqrt(T/1e5))) * n_e * n_h0;
-  le_hep = (5.54e-17) * pow(T,(-0.397)) * exp(-473638.0/T) * (1.0 / (1.0 + sqrt(T/1e5))) * n_e * n_hep;
-  li_h0 = (1.27e-21) * sqrt(T) * exp(-157809.1/T) * (1.0 / (1.0 + sqrt(T/1e5))) * n_e * n_h0;
-  li_he0 = (9.38e-22) * sqrt(T) * exp(-285335.4/T) * (1.0 / (1.0 + sqrt(T/1e5))) * n_e * n_he0;
-  li_hep = (4.95e-22) * sqrt(T) * exp(-631515.0/T) * (1.0 / (1.0 + sqrt(T/1e5))) * n_e * n_hep;
-  lr_hp = (8.70e-27) * sqrt(T) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7)))) * n_e * n_hp;
-  lr_hep = (1.55e-26) * pow(T,(0.3647)) * n_e * n_hep;
-  lr_hepp = (3.48e-26) * sqrt(T) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7)))) * n_e * n_hepp;
-  ld_hep = (1.24e-13) * pow(T,(-1.5)) * exp(-470000.0/T) * (1.0 + 0.3*exp(-94000.0/T)) * n_e * n_hep;
-  g_ff = 1.1 + 0.34*exp(-(5.5-log(T))*(5.5-log(T))/3.0); // Gaunt factor
-  l_ff = (1.42e-27) * g_ff * sqrt(T) * (n_hp + n_hep + 4*n_hepp) * n_e;
+  le_h0   = (7.50e-19) * exp(-118348.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_h0;
+  le_hep  = (5.54e-17) * pow(T, (-0.397)) * exp(-473638.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_hep;
+  li_h0   = (1.27e-21) * sqrt(T) * exp(-157809.1 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_h0;
+  li_he0  = (9.38e-22) * sqrt(T) * exp(-285335.4 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_he0;
+  li_hep  = (4.95e-22) * sqrt(T) * exp(-631515.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))) * n_e * n_hep;
+  lr_hp   = (8.70e-27) * sqrt(T) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))) * n_e * n_hp;
+  lr_hep  = (1.55e-26) * pow(T, (0.3647)) * n_e * n_hep;
+  lr_hepp = (3.48e-26) * sqrt(T) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))) * n_e * n_hepp;
+  ld_hep  = (1.24e-13) * pow(T, (-1.5)) * exp(-470000.0 / T) * (1.0 + 0.3 * exp(-94000.0 / T)) * n_e * n_hep;
+  g_ff    = 1.1 + 0.34 * exp(-(5.5 - log(T)) * (5.5 - log(T)) / 3.0);  // Gaunt factor
+  l_ff    = (1.42e-27) * g_ff * sqrt(T) * (n_hp + n_hep + 4 * n_hepp) * n_e;
 
   // calculate total cooling rate (erg s^-1 cm^-3)
   cool = le_h0 + le_hep + li_h0 + li_he0 + li_hep + lr_hp + lr_hep + lr_hepp + ld_hep + l_ff;
@@ -285,84 +283,75 @@ __device__ Real primordial_cool(Real n, Real T)
   // calculate total photoionization heating rate
   H = 0.0;
   if (heat_flag) {
-    H = n_h0*e_h0 + n_he0*e_he0 + n_hep*e_hep;
+    H = n_h0 * e_h0 + n_he0 * e_he0 + n_hep * e_hep;
   }
 
   cool -= H;
 
   return cool;
-
 }
 
-
 /* \fn __device__ Real CIE_cool(Real n, Real T)
  * \brief Analytic fit to a solar metallicity CIE cooling curve
           calculated using Cloudy. */
 __device__ Real CIE_cool(Real n, Real T)
 {
-  Real lambda = 0.0; //cooling rate, erg s^-1 cm^3
-  Real cool = 0.0; //cooling per unit volume, erg /s / cm^3
+  Real lambda = 0.0;  // cooling rate, erg s^-1 cm^3
+  Real cool   = 0.0;  // cooling per unit volume, erg /s / cm^3
 
   // fit to CIE cooling function
   if (log10(T) < 4.0) {
     lambda = 0.0;
-  }
-  else if (log10(T) >= 4.0 && log10(T) < 5.9) {
+  } else if (log10(T) >= 4.0 && log10(T) < 5.9) {
     lambda = pow(10.0, (-1.3 * (log10(T) - 5.25) * (log10(T) - 5.25) - 21.25));
-  }
-  else if (log10(T) >= 5.9 && log10(T) < 7.4) {
+  } else if (log10(T) >= 5.9 && log10(T) < 7.4) {
     lambda = pow(10.0, (0.7 * (log10(T) - 7.1) * (log10(T) - 7.1) - 22.8));
-  }
-  else {
-    lambda = pow(10.0, (0.45*log10(T) - 26.065));
+  } else {
+    lambda = pow(10.0, (0.45 * log10(T) - 26.065));
   }
 
   // cooling rate per unit volume
-  cool = n*n*lambda;
+  cool = n * n * lambda;
 
   return cool;
-
 }
 
-
-#ifdef CLOUDY_COOL
-/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
+  #ifdef CLOUDY_COOL
+/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t
+ coolTexObj, cudaTextureObject_t heatTexObj)
  * \brief Uses texture mapping to interpolate Cloudy cooling/heating
           tables at z = 0 with solar metallicity and an HM05 UV background. */
 __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
 {
-  Real lambda = 0.0; //cooling rate, erg s^-1 cm^3
-  Real H = 0.0; //heating rate, erg s^-1 cm^3
-  Real cool = 0.0; //cooling per unit volume, erg /s / cm^3
+  Real lambda = 0.0;  // cooling rate, erg s^-1 cm^3
+  Real H      = 0.0;  // heating rate, erg s^-1 cm^3
+  Real cool   = 0.0;  // cooling per unit volume, erg /s / cm^3
   float log_n, log_T;
   log_n = log10(n);
   log_T = log10(T);
 
   // remap coordinates for texture
   // remapped = (input - TABLE_MIN_VALUE)*(1/TABLE_SPACING)
-  // remapped = (input - TABLE_MIN_VALUE)*(NUM_CELLS_PER_DECADE)  
-  log_T = (log_T - 1.0)*10;
-  log_n = (log_n + 6.0)*10;
+  // remapped = (input - TABLE_MIN_VALUE)*(NUM_CELLS_PER_DECADE)
+  log_T = (log_T - 1.0) * 10;
+  log_n = (log_n + 6.0) * 10;
+
+  // Note: although the cloudy table columns are n,T,L,H , T is the fastest
+  // variable so it is treated as "x" This is why the Texture calls are T first,
+  // then n: Bilinear_Texture(tex, log_T, log_n)
 
-  // Note: although the cloudy table columns are n,T,L,H , T is the fastest variable so it is treated as "x"
-  // This is why the Texture calls are T first, then n: Bilinear_Texture(tex, log_T, log_n) 
-  
   // don't cool below 10 K
   if (log10(T) > 1.0) {
     lambda = Bilinear_Texture(coolTexObj, log_T, log_n);
-  }
-  else lambda = 0.0;
+  } else
+    lambda = 0.0;
   H = Bilinear_Texture(heatTexObj, log_T, log_n);
 
   // cooling rate per unit volume
-  cool = n*n*(powf(10, lambda) - powf(10, H));
+  cool = n * n * (powf(10, lambda) - powf(10, H));
   // printf("DEBUG Cloudy L350: %.17e\n",cool);
   return cool;
 }
-#endif //CLOUDY_COOL
-
-
-
+  #endif  // CLOUDY_COOL
 
-#endif //COOLING_GPU
-#endif //CUDA
+#endif  // COOLING_GPU
diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h
index f8d098e59..d9105fde3 100644
--- a/src/cooling/cooling_cuda.h
+++ b/src/cooling/cooling_cuda.h
@@ -1,50 +1,50 @@
 /*! \file cooling_cuda.h
  *  \brief Declarations of cooling functions. */
 
-#ifdef CUDA
 #ifdef COOLING_GPU
-#pragma once
+  #pragma once
 
-#include "../utils/gpu.hpp"
-#include <math.h>
-#include "../global/global.h"
+  #include <math.h>
+
+  #include "../global/global.h"
+  #include "../utils/gpu.hpp"
 
 extern cudaTextureObject_t coolTexObj;
 extern cudaTextureObject_t heatTexObj;
 
-/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma)
- *  \brief When passed an array of conserved variables and a timestep, adjust the value
-           of the total energy for each cell according to the specified cooling function. */
+/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int
+ n_ghost, int n_fields, Real dt, Real gamma)
+ *  \brief When passed an array of conserved variables and a timestep, adjust
+ the value of the total energy for each cell according to the specified cooling
+ function. */
 void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma);
 
-
-/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real dt, Real gamma)
- *  \brief When passed an array of conserved variables and a timestep, adjust the value
-           of the total energy for each cell according to the specified cooling function. */
-__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj);
-
+/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int
+ n_ghost, Real dt, Real gamma)
+ *  \brief When passed an array of conserved variables and a timestep, adjust
+ the value of the total energy for each cell according to the specified cooling
+ function. */
+__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt,
+                               Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj);
 
 /* \fn __device__ Real test_cool(Real n, Real T)
  * \brief Cooling function from Creasey 2011. */
 __device__ Real test_cool(int tid, Real n, Real T);
 
-
 /* \fn __device__ Real primordial_cool(Real n, Real T)
  * \brief Primordial hydrogen/helium cooling curve
           derived according to Katz et al. 1996. */
 __device__ Real primordial_cool(Real n, Real T);
 
-
 /* \fn __device__ Real CIE_cool(Real n, Real T)
  * \brief Analytic fit to a solar metallicity CIE cooling curve
           calculated using Cloudy. */
 __device__ Real CIE_cool(Real n, Real T);
 
-
-/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
+/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t
+ coolTexObj, cudaTextureObject_t heatTexObj)
  * \brief Uses texture mapping to interpolate Cloudy cooling/heating
           tables at z = 0 with solar metallicity and an HM05 UV background. */
 __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj);
 
-#endif //COOLING_GPU
-#endif //CUDA
+#endif  // COOLING_GPU
diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu
index 2d5758bbd..5e0f2d460 100644
--- a/src/cooling/load_cloudy_texture.cu
+++ b/src/cooling/load_cloudy_texture.cu
@@ -1,29 +1,27 @@
 /*! \file load_cloudy_texture.cu
  *  \brief Wrapper file to load cloudy cooling table as CUDA texture. */
 
-#ifdef CUDA
 #ifdef CLOUDY_COOL
 
-#include <stdio.h>
-#include <stdlib.h>
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../cooling/load_cloudy_texture.h"
-#include "../cooling/cooling_cuda.h"
-#include "../cooling/texture_utilities.h"
+  #include <stdio.h>
+  #include <stdlib.h>
 
-#include "../io/io.h" // provides chprintf
+  #include "../cooling/cooling_cuda.h"
+  #include "../cooling/load_cloudy_texture.h"
+  #include "../cooling/texture_utilities.h"
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../io/io.h"  // provides chprintf
 
-cudaArray* cuCoolArray;
-cudaArray* cuHeatArray;
+cudaArray *cuCoolArray;
+cudaArray *cuHeatArray;
 
 void Test_Cloudy_Textures();
 void Test_Cloudy_Speed();
 
-
 /* \fn void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table)
  * \brief Load the Cloudy cooling tables into host (CPU) memory. */
-void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table)
+void Host_Read_Cooling_Tables(float *cooling_table, float *heating_table)
 {
   double *n_arr;
   double *T_arr;
@@ -36,56 +34,51 @@ void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table)
 
   FILE *infile;
   char buffer[0x1000];
-  char * pch;
+  char *pch;
 
   // allocate arrays for temperature data
-  n_arr = (double *) malloc(nx*ny*sizeof(double));
-  T_arr = (double *) malloc(nx*ny*sizeof(double));
-  L_arr = (double *) malloc(nx*ny*sizeof(double));
-  H_arr = (double *) malloc(nx*ny*sizeof(double));
+  n_arr = (double *)malloc(nx * ny * sizeof(double));
+  T_arr = (double *)malloc(nx * ny * sizeof(double));
+  L_arr = (double *)malloc(nx * ny * sizeof(double));
+  H_arr = (double *)malloc(nx * ny * sizeof(double));
 
   // Read in cloudy cooling/heating curve (function of density and temperature)
-  i=0;
+  i = 0;
+
+  const char *cloudy_filename1 = "./cloudy_coolingcurve.txt";
+  const char *cloudy_filename2 = "src/cooling/cloudy_coolingcurve.txt";
+  const char *file_in_use;
 
-  const char* cloudy_filename1 = "./cloudy_coolingcurve.txt";
-  const char* cloudy_filename2 = "src/cooling/cloudy_coolingcurve.txt";
-  const char* file_in_use;
-  
-  infile = fopen(cloudy_filename1, "r");
+  infile      = fopen(cloudy_filename1, "r");
   file_in_use = cloudy_filename1;
   if (infile == NULL) {
-    infile = fopen(cloudy_filename2, "r");
+    infile      = fopen(cloudy_filename2, "r");
     file_in_use = cloudy_filename2;
   }
 
-  
   if (infile == NULL) {
-    chprintf("Unable to open Cloudy file with expected relative paths:\n %s \n OR \n %s\n", cloudy_filename1, cloudy_filename2);
+    chprintf(
+        "Unable to open Cloudy file with expected relative paths:\n %s \n OR "
+        "\n %s\n",
+        cloudy_filename1, cloudy_filename2);
     exit(1);
   } else {
     chprintf("Using Cloudy file at relative path: %s \n", file_in_use);
   }
-  
 
-  while (fgets(buffer, sizeof(buffer), infile) != NULL)
-  {
+  while (fgets(buffer, sizeof(buffer), infile) != NULL) {
     if (buffer[0] == '#') {
       continue;
-    }
-    else {
-      pch = strtok(buffer, "\t");
+    } else {
+      pch      = strtok(buffer, "\t");
       n_arr[i] = atof(pch);
-      while (pch != NULL)
-      {
+      while (pch != NULL) {
         pch = strtok(NULL, "\t");
-        if (pch != NULL)
-          T_arr[i] = atof(pch);
+        if (pch != NULL) T_arr[i] = atof(pch);
         pch = strtok(NULL, "\t");
-        if (pch != NULL)
-          L_arr[i] = atof(pch);
+        if (pch != NULL) L_arr[i] = atof(pch);
         pch = strtok(NULL, "\t");
-        if (pch != NULL)
-          H_arr[i] = atof(pch);
+        if (pch != NULL) H_arr[i] = atof(pch);
       }
       i++;
     }
@@ -93,8 +86,7 @@ void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table)
   fclose(infile);
 
   // copy data from cooling array into the table
-  for (i=0; i<nx*ny; i++)
-  {
+  for (i = 0; i < nx * ny; i++) {
     cooling_table[i] = float(L_arr[i]);
     heating_table[i] = float(H_arr[i]);
   }
@@ -106,7 +98,6 @@ void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table)
   free(H_arr);
 }
 
-
 /* \fn void Load_Cuda_Textures()
  * \brief Load the Cloudy cooling tables into texture memory on the GPU. */
 void Load_Cuda_Textures()
@@ -118,47 +109,53 @@ void Load_Cuda_Textures()
 
   // allocate host arrays to be copied to textures
   // these arrays are declared as external pointers in global.h
-  CudaSafeCall( cudaHostAlloc(&cooling_table, nx*ny*sizeof(float), cudaHostAllocDefault) );
-  CudaSafeCall( cudaHostAlloc(&heating_table, nx*ny*sizeof(float), cudaHostAllocDefault) );
+  GPU_Error_Check(cudaHostAlloc(&cooling_table, nx * ny * sizeof(float), cudaHostAllocDefault));
+  GPU_Error_Check(cudaHostAlloc(&heating_table, nx * ny * sizeof(float), cudaHostAllocDefault));
 
   // Read cooling tables into the host arrays
   Host_Read_Cooling_Tables(cooling_table, heating_table);
 
   // Allocate CUDA arrays in device memory
   cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
-  cudaMallocArray(&cuCoolArray, &channelDesc, nx, ny);
-  cudaMallocArray(&cuHeatArray, &channelDesc, nx, ny);
+  GPU_Error_Check(cudaMallocArray(&cuCoolArray, &channelDesc, nx, ny));
+  GPU_Error_Check(cudaMallocArray(&cuHeatArray, &channelDesc, nx, ny));
 
   // Copy the cooling and heating arrays from host to device
 
   // cudaMemcpyToArray is being deprecated
-  // cudaMemcpyToArray(cuCoolArray, 0, 0, cooling_table, nx*ny*sizeof(float), cudaMemcpyHostToDevice);
-  // cudaMemcpyToArray(cuHeatArray, 0, 0, heating_table, nx*ny*sizeof(float), cudaMemcpyHostToDevice);
+  // cudaMemcpyToArray(cuCoolArray, 0, 0, cooling_table, nx*ny*sizeof(float),
+  // cudaMemcpyHostToDevice); cudaMemcpyToArray(cuHeatArray, 0, 0,
+  // heating_table, nx*ny*sizeof(float), cudaMemcpyHostToDevice);
 
-  cudaMemcpy2DToArray(cuCoolArray, 0, 0, cooling_table, nx*sizeof(float) , nx*sizeof(float), ny, cudaMemcpyHostToDevice);
-  cudaMemcpy2DToArray(cuHeatArray, 0, 0, heating_table, nx*sizeof(float) , nx*sizeof(float), ny, cudaMemcpyHostToDevice);
+  cudaMemcpy2DToArray(cuCoolArray, 0, 0, cooling_table, nx * sizeof(float), nx * sizeof(float), ny,
+                      cudaMemcpyHostToDevice);
+  cudaMemcpy2DToArray(cuHeatArray, 0, 0, heating_table, nx * sizeof(float), nx * sizeof(float), ny,
+                      cudaMemcpyHostToDevice);
 
   // Specify textures
   struct cudaResourceDesc coolResDesc;
   memset(&coolResDesc, 0, sizeof(coolResDesc));
-  coolResDesc.resType = cudaResourceTypeArray;
+  coolResDesc.resType         = cudaResourceTypeArray;
   coolResDesc.res.array.array = cuCoolArray;
   struct cudaResourceDesc heatResDesc;
   memset(&heatResDesc, 0, sizeof(heatResDesc));
-  heatResDesc.resType = cudaResourceTypeArray;
+  heatResDesc.resType         = cudaResourceTypeArray;
   heatResDesc.res.array.array = cuHeatArray;
 
   // Specify texture object parameters (same for both tables)
   struct cudaTextureDesc texDesc;
   memset(&texDesc, 0, sizeof(texDesc));
-  texDesc.addressMode[0] = cudaAddressModeClamp; // out-of-bounds fetches return border values dimension 0
-  texDesc.addressMode[1] = cudaAddressModeClamp; // out-of-bounds fetches return border values dimension 1
+  texDesc.addressMode[0] = cudaAddressModeClamp;  // out-of-bounds fetches return border values
+                                                  // dimension 0
+  texDesc.addressMode[1] = cudaAddressModeClamp;  // out-of-bounds fetches return border values
+                                                  // dimension 1
   texDesc.filterMode = cudaFilterModePoint;
-  // We use point mode instead of Linear mode in order to do the interpolation ourselves.
-  // Linear mode introduces errors since it only uses 8 bits.
-  //cudaFilterModeLinear;
+  // We use point mode instead of Linear mode in order to do the interpolation
+  // ourselves. Linear mode introduces errors since it only uses 8 bits.
+  // cudaFilterModeLinear;
   texDesc.readMode = cudaReadModeElementType;
-  // Do not normalize coordinates, in order to simplify conversion from real values to texture coordinates
+  // Do not normalize coordinates, in order to simplify conversion from real
+  // values to texture coordinates
   texDesc.normalizedCoords = 0;
 
   // Create texture objects
@@ -166,13 +163,12 @@ void Load_Cuda_Textures()
   cudaCreateTextureObject(&heatTexObj, &heatResDesc, &texDesc, NULL);
 
   // Free the memory associated with the cooling tables on the host
-  CudaSafeCall( cudaFreeHost(cooling_table) );
-  CudaSafeCall( cudaFreeHost(heating_table) );
+  GPU_Error_Check(cudaFreeHost(cooling_table));
+  GPU_Error_Check(cudaFreeHost(heating_table));
 
   // Run Test
   // Test_Cloudy_Textures();
   // Test_Cloudy_Speed();
-
 }
 
 void Free_Cuda_Textures()
@@ -184,26 +180,24 @@ void Free_Cuda_Textures()
   // Free the device memory associated with the cuda arrays
   cudaFreeArray(cuCoolArray);
   cudaFreeArray(cuHeatArray);
-
 }
 
-
-
-
-/* Consider this function only to be used at the end of Load_Cuda_Textures when testing
- * Evaluate texture on grid of size num_n num_T for variables n,T */
-__global__ void Test_Cloudy_Textures_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
+/* Consider this function only to be used at the end of Load_Cuda_Textures when
+ * testing Evaluate texture on grid of size num_n num_T for variables n,T */
+__global__ void Test_Cloudy_Textures_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj,
+                                            cudaTextureObject_t heatTexObj)
 {
-  int id,id_n,id_T;
+  int id, id_n, id_T;
   id = threadIdx.x + blockIdx.x * blockDim.x;
   // Calculate log_T and log_n based on id
-  id_T = id/num_n;
-  id_n = id%num_n;
+  id_T = id / num_n;
+  id_n = id % num_n;
 
-  float grid_offset = 0.1/512.0;
-  // Min value, but include id=-1 as an outside value to check clamping. Use dx = 0.05 instead of 0.1 to check interpolation
-  float log_T = 1.0  + (id_T-1)*0.05 + grid_offset;
-  float log_n = -6.0 + (id_n-1)*0.05 + grid_offset;
+  float grid_offset = 0.1 / 512.0;
+  // Min value, but include id=-1 as an outside value to check clamping. Use dx
+  // = 0.05 instead of 0.1 to check interpolation
+  float log_T = 1.0 + (id_T - 1) * 0.05 + grid_offset;
+  float log_n = -6.0 + (id_n - 1) * 0.05 + grid_offset;
 
   // Remap for texture with normalized coords
   // float rlog_T = (log_T - 1.0) / 8.1;
@@ -214,27 +208,27 @@ __global__ void Test_Cloudy_Textures_Kernel(int num_n, int num_T, cudaTextureObj
   float rlog_n = (log_n + 6.0) * 10;
 
   // Evaluate
-  float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D<float>(coolTexObj, rlog_T, rlog_n);
-  float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D<float>(heatTexObj, rlog_T, rlog_n);
+  float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n);  // tex2D<float>(coolTexObj, rlog_T, rlog_n);
+  float heat   = Bilinear_Texture(heatTexObj, rlog_T, rlog_n);  // tex2D<float>(heatTexObj, rlog_T, rlog_n);
 
   // Hackfully print it out for processing for correctness
-  printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, heat);
-
+  printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n", log_T, log_n, lambda, heat);
 }
 
-
-/* Consider this function only to be used at the end of Load_Cuda_Textures when testing
- * Evaluate texture on grid of size num_n num_T for variables n,T */
-__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj)
+/* Consider this function only to be used at the end of Load_Cuda_Textures when
+ * testing Evaluate texture on grid of size num_n num_T for variables n,T */
+__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj,
+                                         cudaTextureObject_t heatTexObj)
 {
-  int id,id_n,id_T;
+  int id, id_n, id_T;
   id = threadIdx.x + blockIdx.x * blockDim.x;
   // Calculate log_T and log_n based on id
-  id_T = id/num_n;
-  id_n = id%num_n;
+  id_T = id / num_n;
+  id_n = id % num_n;
 
-  // Min value, but include id=-1 as an outside value to check clamping. Use dx = 0.05 instead of 0.1 to check interpolation
-  // float log_T = 1.0  + (id_T-1)*0.05;
+  // Min value, but include id=-1 as an outside value to check clamping. Use dx
+  // = 0.05 instead of 0.1 to check interpolation float log_T = 1.0  +
+  // (id_T-1)*0.05;
   //  float log_n = -6.0 + (id_n-1)*0.05;
 
   // Remap for texture with normalized coords
@@ -245,56 +239,48 @@ __global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject
   // float rlog_T = (log_T - 1.0) * 10;
   // float rlog_n = (log_n + 6.0) * 10;
 
-  float rlog_T = (id_T - 1)*0.0125;
-  float rlog_n = (id_n - 1)*0.0125;
+  float rlog_T = (id_T - 1) * 0.0125;
+  float rlog_n = (id_n - 1) * 0.0125;
 
   // Evaluate
-  float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D<float>(coolTexObj, rlog_T, rlog_n);
-  float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D<float>(heatTexObj, rlog_T, rlog_n);
+  float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n);  // tex2D<float>(coolTexObj, rlog_T, rlog_n);
+  float heat   = Bilinear_Texture(heatTexObj, rlog_T, rlog_n);  // tex2D<float>(heatTexObj, rlog_T, rlog_n);
 
   // Hackfully print it out for processing for correctness
-  // printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, heat);
-
+  // printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda,
+  // heat);
 }
 
-/* Consider this function only to be used at the end of Load_Cuda_Textures when testing
- * Evaluate texture on grid of size num_n num_T for variables n,T */
+/* Consider this function only to be used at the end of Load_Cuda_Textures when
+ * testing Evaluate texture on grid of size num_n num_T for variables n,T */
 void Test_Cloudy_Textures()
 {
-  int num_n = 1+2*121;
-  int num_T = 1+2*81;
-  dim3 dim1dGrid((num_n*num_T+TPB-1)/TPB, 1, 1);
+  int num_n = 1 + 2 * 121;
+  int num_T = 1 + 2 * 81;
+  dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel,dim1dGrid,dim1dBlock,0,0,num_n,num_T,coolTexObj,heatTexObj);
-  CHECK(cudaDeviceSynchronize());
+  hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj);
+  GPU_Error_Check(cudaDeviceSynchronize());
   printf("Exiting due to Test_Cloudy_Textures() being called \n");
   exit(0);
 }
 
 void Test_Cloudy_Speed()
 {
-  int num_n = 1+80*121;
-  int num_T = 1+80*81;
-  dim3 dim1dGrid((num_n*num_T+TPB-1)/TPB, 1, 1);
+  int num_n = 1 + 80 * 121;
+  int num_T = 1 + 80 * 81;
+  dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  CHECK(cudaDeviceSynchronize());
-  Real time_start = get_time();
-  for (int i=0; i<100; i++) {
-    hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel,dim1dGrid,dim1dBlock,0,0,num_n,num_T,coolTexObj,heatTexObj);
+  GPU_Error_Check(cudaDeviceSynchronize());
+  Real time_start = Get_Time();
+  for (int i = 0; i < 100; i++) {
+    hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj);
   }
-  CHECK(cudaDeviceSynchronize());
-  Real time_end = get_time();
+  GPU_Error_Check(cudaDeviceSynchronize());
+  Real time_end = Get_Time();
   printf(" Cloudy Test Time %9.4f micro-s \n", (time_end - time_start));
   printf("Exiting due to Test_Cloudy_Speed() being called \n");
   exit(0);
 }
 
-
-
-
-
-
-
-
-#endif
-#endif
+#endif  // CLOUDY_COOL
diff --git a/src/cooling/load_cloudy_texture.h b/src/cooling/load_cloudy_texture.h
index 164125392..7d6307f71 100644
--- a/src/cooling/load_cloudy_texture.h
+++ b/src/cooling/load_cloudy_texture.h
@@ -1,21 +1,19 @@
 /*! \file load_cloudy_texture.h
  *  \brief Wrapper file to load cloudy cooling table as CUDA texture. */
 
-#ifdef CUDA
 #ifdef CLOUDY_COOL
 
-#pragma once
+  #pragma once
 
-#include "../global/global.h"
+  #include "../global/global.h"
 
 /* \fn void Load_Cuda_Textures()
  * \brief Load the Cloudy cooling tables into texture memory on the GPU. */
 void Load_Cuda_Textures();
 
 /* \fn void Free_Cuda_Textures()
- * \brief Unbind the texture memory on the GPU, and free the associated Cuda arrays. */
+ * \brief Unbind the texture memory on the GPU, and free the associated Cuda
+ * arrays. */
 void Free_Cuda_Textures();
 
-#endif
-#endif
-
+#endif  // CLOUDY_COOL
diff --git a/src/cooling/texture_utilities.h b/src/cooling/texture_utilities.h
index 6b271d5a1..fc335bcf7 100644
--- a/src/cooling/texture_utilities.h
+++ b/src/cooling/texture_utilities.h
@@ -1,23 +1,22 @@
 /*! \file texture_utilities.h
  *  \brief Declarations of functions needed for textures. */
 
-// WARNING: do not include this header file in any .cpp file or any .h file that would be included into a .cpp file
-// because tex2D is undefined when compiling with gcc.
+// WARNING: do not include this header file in any .cpp file or any .h file that
+// would be included into a .cpp file because tex2D is undefined when compiling
+// with gcc.
 
-#ifdef CUDA
 #pragma once
 
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
+#include "../utils/gpu.hpp"
 
-inline __device__ float lerp(float v0, float v1, float f)
-{
-  return fma(f, v1, fma(-f,v0,v0));
-}
+inline __device__ float lerp(float v0, float v1, float f) { return fma(f, v1, fma(-f, v0, v0)); }
 
 /* \fn float Bilinear_Texture(cudaTextureObject_t tex, float x, float y)
-   \brief Access texture values from tex at coordinates (x,y) using bilinear interpolation
+   \brief Access texture values from tex at coordinates (x,y) using bilinear
+   interpolation
 */
 inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float y)
 {
@@ -27,18 +26,16 @@ inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float
   float fx = x - px;
   float fy = y - py;
 
-  // 0.5 offset is necessary to represent half-pixel offset built into texture coordinates
+  // 0.5 offset is necessary to represent half-pixel offset built into texture
+  // coordinates
   px += 0.5;
   py += 0.5;
 
-  float t00 = tex2D<float>(tex,px,py);
-  float t01 = tex2D<float>(tex,px,py+1);
-  float t10 = tex2D<float>(tex,px+1,py);
-  float t11 = tex2D<float>(tex,px+1,py+1);
+  float t00 = tex2D<float>(tex, px, py);
+  float t01 = tex2D<float>(tex, px, py + 1);
+  float t10 = tex2D<float>(tex, px + 1, py);
+  float t11 = tex2D<float>(tex, px + 1, py + 1);
   // The inner lerps interpolate along x
   // The outer lerp interpolates along y
   return lerp(lerp(t00, t10, fx), lerp(t01, t11, fx), fy);
-
 }
-
-#endif //CUDA
diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp
index 4392feefd..a7f5c36cb 100644
--- a/src/cooling_grackle/cool_grackle.cpp
+++ b/src/cooling_grackle/cool_grackle.cpp
@@ -1,184 +1,180 @@
 #ifdef COOLING_GRACKLE
 
+  #include "../cooling_grackle/cool_grackle.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../io/io.h"
-#include "../cooling_grackle/cool_grackle.h"
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
 
+  #include "../grid/grid_enum.h"
+  #include "../io/io.h"
 
+Cool_GK::Cool_GK(void) {}
 
-Cool_GK::Cool_GK( void ){}
+void Grid3D::Initialize_Grackle(struct Parameters *P)
+{
+  chprintf("Initializing Grackle... \n");
 
-void Grid3D::Initialize_Grackle( struct parameters *P ){
-
-  chprintf( "Initializing Grackle... \n");
-
-  Cool.Initialize( P, Cosmo );
+  Cool.Initialize(P, Cosmo);
 
   Allocate_Memory_Grackle();
 
   Initialize_Fields_Grackle();
 
-  chprintf( "Grackle Initialized Successfully. \n\n");
-
-
+  chprintf("Grackle Initialized Successfully. \n\n");
 }
 
-
-void Cool_GK::Initialize( struct parameters *P, Cosmology &Cosmo ){
-
-  chprintf( " Using Grackle for chemistry and cooling \n" );
-  chprintf( " N scalar fields: %d \n", NSCALARS );
+void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo)
+{
+  chprintf(" Using Grackle for chemistry and cooling \n");
+  chprintf(" N scalar fields: %d \n", NSCALARS);
 
   grackle_verbose = 1;
   #ifdef MPI_CHOLLA
   // Enable output
-  if (procID != 0 ) grackle_verbose = 0;
+  if (procID != 0) grackle_verbose = 0;
   #endif
 
-
   tiny_number = 1.e-20;
-  gamma = P->gamma;
+  gamma       = P->gamma;
 
-  dens_conv = Cosmo.rho_0_gas;
-  energy_conv =   Cosmo.v_0_gas * Cosmo.v_0_gas ;
+  dens_conv   = Cosmo.rho_0_gas;
+  energy_conv = Cosmo.v_0_gas * Cosmo.v_0_gas;
 
   Real Msun = MSUN_CGS;
-  Real kpc = KPC_CGS;
-  Real km = KM_CGS
-
+  Real kpc  = KPC_CGS;
+  Real km   = KM_CGS
 
-  dens_to_CGS = dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h;
-  vel_to_CGS = km;
-  energy_to_CGS =  km * km;
+      dens_to_CGS = dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h;
+  vel_to_CGS      = km;
+  energy_to_CGS   = km * km;
 
   // First, set up the units system.
   // These are conversions from code units to cgs.
-  units.comoving_coordinates = 1; // 1 if cosmological sim, 0 if not
-  units.a_units = 1.0 ; // units for the expansion factor
-  units.a_value = Cosmo.current_a / units.a_units;
-  units.density_units = dens_to_CGS  / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a ;
-  units.length_units = kpc / Cosmo.cosmo_h * Cosmo.current_a;
-  units.time_units = KPC / Cosmo.cosmo_h ;
-  units.velocity_units = units.length_units / Cosmo.current_a / units.time_units; // since u = a * dx/dt
-
-  // Second, create a chemistry object for parameters.  This needs to be a pointer.
+  units.comoving_coordinates = 1;    // 1 if cosmological sim, 0 if not
+  units.a_units              = 1.0;  // units for the expansion factor
+  units.a_value              = Cosmo.current_a / units.a_units;
+  units.density_units        = dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a;
+  units.length_units         = kpc / Cosmo.cosmo_h * Cosmo.current_a;
+  units.time_units           = KPC / Cosmo.cosmo_h;
+  units.velocity_units       = units.length_units / Cosmo.current_a / units.time_units;  // since u = a * dx/dt
+
+  // Second, create a chemistry object for parameters.  This needs to be a
+  // pointer.
   data = new chemistry_data;
   if (set_default_chemistry_parameters(data) == 0) {
-    chprintf( "GRACKLE: Error in set_default_chemistry_parameters.\n");
-    exit(-1) ;
+    chprintf("GRACKLE: Error in set_default_chemistry_parameters.\n");
+    exit(-1);
   }
   // Set parameter values for chemistry.
   // Access the parameter storage with the struct you've created
   // or with the grackle_data pointer declared in grackle.h (see further below).
-  data->use_grackle = 1;            // chemistry on
-  data->with_radiative_cooling = 1; // Cooling on
-  data->primordial_chemistry = 1;   // molecular network with H, He
-  data->UVbackground = 1;           // UV background on
-  // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012.h5"; // data file
-  // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012_cloudy.h5"; // data file
-  // data->grackle_data_file = "src/cooling_grackle/CloudyData_UVB=Puchwein2018_cloudy.h5"; // data file
-  data->grackle_data_file = P->UVB_rates_file; // data file
-  // data->grackle_data_file = "src/cooling/CloudyData_UVB=FG2011.h5"; // data file
-  data->use_specific_heating_rate = 0;
+  data->use_grackle            = 1;  // chemistry on
+  data->with_radiative_cooling = 1;  // Cooling on
+  data->primordial_chemistry   = 1;  // molecular network with H, He
+  data->UVbackground           = 1;  // UV background on
+  // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012.h5"; // data
+  // file data->grackle_data_file =
+  // "src/cooling/CloudyData_UVB=HM2012_cloudy.h5"; // data file
+  // data->grackle_data_file =
+  // "src/cooling_grackle/CloudyData_UVB=Puchwein2018_cloudy.h5"; // data file
+  data->grackle_data_file = P->UVB_rates_file;  // data file
+  // data->grackle_data_file = "src/cooling/CloudyData_UVB=FG2011.h5"; // data
+  // file
+  data->use_specific_heating_rate   = 0;
   data->use_volumetric_heating_rate = 0;
-  data->cmb_temperature_floor = 1;
+  data->cmb_temperature_floor       = 1;
 
   #ifdef GRACKLE_METALS
-  data->metal_cooling = 1;          // metal cooling off
+  data->metal_cooling = 1;  // metal cooling off
   #else
-  chprintf( "WARNING: Metal Cooling is Off. \n" );
-  data->metal_cooling = 0;          // metal cooling off
+  chprintf("WARNING: Metal Cooling is Off. \n");
+  data->metal_cooling = 0;  // metal cooling off
   #endif
 
   #ifdef PARALLEL_OMP
   data->omp_nthreads = N_OMP_THREADS_GRACKLE;
   #endif
 
-  if ( data->UVbackground == 1) chprintf( "GRACKLE: Loading UV Background File: %s\n", data->grackle_data_file );
+  if (data->UVbackground == 1) chprintf("GRACKLE: Loading UV Background File: %s\n", data->grackle_data_file);
 
   // Finally, initialize the chemistry object.
   if (initialize_chemistry_data(&units) == 0) {
-    chprintf( "GRACKLE: Error in initialize_chemistry_data.\n");
-    exit(-1) ;
+    chprintf("GRACKLE: Error in initialize_chemistry_data.\n");
+    exit(-1);
   }
 
-  if ( data->UVbackground == 1){
-    scale_factor_UVB_on = 1 / (data->UVbackground_redshift_on + 1 );
-    chprintf( "GRACKLE: UVB on: %f \n", scale_factor_UVB_on  );
+  if (data->UVbackground == 1) {
+    scale_factor_UVB_on = 1 / (data->UVbackground_redshift_on + 1);
+    chprintf("GRACKLE: UVB on: %f \n", scale_factor_UVB_on);
   }
-
 }
 
-void Grid3D::Allocate_Memory_Grackle( ){
-
-int n_cells = H.nx * H.ny * H.nz;
-int nx = Grav.nx_local;
-int ny = Grav.ny_local;
-int nz = Grav.nz_local;
-// Set grid dimension and size.
-Cool.field_size = n_cells;
-Cool.fields.grid_rank = 3;
-Cool.fields.grid_dimension = new int[3];
-Cool.fields.grid_start = new int[3];
-Cool.fields.grid_end = new int[3];
-Cool.fields.grid_dimension[0] = H.nx; // the active dimension
-Cool.fields.grid_dimension[1] = H.ny; // the active dimension
-Cool.fields.grid_dimension[2] = H.nz; // the active dimension
-// grid_start and grid_end are used to ignore ghost zones.
-Cool.fields.grid_start[0] = H.n_ghost;
-Cool.fields.grid_start[1] = H.n_ghost;
-Cool.fields.grid_start[2] = H.n_ghost;
-Cool.fields.grid_end[0] =  H.nx - H.n_ghost - 1 ;
-Cool.fields.grid_end[1] =  H.ny - H.n_ghost - 1 ;
-Cool.fields.grid_end[2] =  H.nz - H.n_ghost - 1 ;
-
-Cool.fields.grid_dx = 0.0; // used only for H2 self-shielding approximation
-
-Cool.fields.density         = C.density;
-Cool.fields.internal_energy = (Real *) malloc(Cool.field_size * sizeof(Real));
-// Cool.fields.x_velocity      = (Real *) malloc(Cool.field_size * sizeof(Real));
-// Cool.fields.y_velocity      = (Real *) malloc(Cool.field_size * sizeof(Real));
-// Cool.fields.z_velocity      = (Real *) malloc(Cool.field_size * sizeof(Real));
-Cool.fields.x_velocity      = NULL;
-Cool.fields.y_velocity      = NULL;
-Cool.fields.z_velocity      = NULL;
-
-
-chprintf( " Allocating memory for: HI, HII, HeI, HeII, HeIII, e   densities\n");
-Cool.fields.HI_density      = &C.scalar[ 0*n_cells ];
-Cool.fields.HII_density     = &C.scalar[ 1*n_cells ];
-Cool.fields.HeI_density     = &C.scalar[ 2*n_cells ];
-Cool.fields.HeII_density    = &C.scalar[ 3*n_cells ];
-Cool.fields.HeIII_density   = &C.scalar[ 4*n_cells ];
-Cool.fields.e_density       = &C.scalar[ 5*n_cells ];
-
-#ifdef GRACKLE_METALS
-chprintf( " Allocating memory for: metal density\n");
-Cool.fields.metal_density   = &C.scalar[ 6*n_cells ];
-#else
-Cool.fields.metal_density   = NULL;
-#endif
+void Grid3D::Allocate_Memory_Grackle()
+{
+  int n_cells = H.nx * H.ny * H.nz;
+  int nx      = Grav.nx_local;
+  int ny      = Grav.ny_local;
+  int nz      = Grav.nz_local;
+  // Set grid dimension and size.
+  Cool.field_size               = n_cells;
+  Cool.fields.grid_rank         = 3;
+  Cool.fields.grid_dimension    = new int[3];
+  Cool.fields.grid_start        = new int[3];
+  Cool.fields.grid_end          = new int[3];
+  Cool.fields.grid_dimension[0] = H.nx;  // the active dimension
+  Cool.fields.grid_dimension[1] = H.ny;  // the active dimension
+  Cool.fields.grid_dimension[2] = H.nz;  // the active dimension
+  // grid_start and grid_end are used to ignore ghost zones.
+  Cool.fields.grid_start[0] = H.n_ghost;
+  Cool.fields.grid_start[1] = H.n_ghost;
+  Cool.fields.grid_start[2] = H.n_ghost;
+  Cool.fields.grid_end[0]   = H.nx - H.n_ghost - 1;
+  Cool.fields.grid_end[1]   = H.ny - H.n_ghost - 1;
+  Cool.fields.grid_end[2]   = H.nz - H.n_ghost - 1;
+
+  Cool.fields.grid_dx = 0.0;  // used only for H2 self-shielding approximation
+
+  Cool.fields.density         = C.density;
+  Cool.fields.internal_energy = (Real *)malloc(Cool.field_size * sizeof(Real));
+  // Cool.fields.x_velocity      = (Real *) malloc(Cool.field_size *
+  // sizeof(Real)); Cool.fields.y_velocity      = (Real *)
+  // malloc(Cool.field_size * sizeof(Real)); Cool.fields.z_velocity      = (Real
+  // *) malloc(Cool.field_size * sizeof(Real));
+  Cool.fields.x_velocity = NULL;
+  Cool.fields.y_velocity = NULL;
+  Cool.fields.z_velocity = NULL;
+
+  chprintf(" Allocating memory for: HI, HII, HeI, HeII, HeIII, e   densities\n");
+  Cool.fields.HI_density    = &C.host[H.n_cells * grid_enum::HI_density];
+  Cool.fields.HII_density   = &C.host[H.n_cells * grid_enum::HII_density];
+  Cool.fields.HeI_density   = &C.host[H.n_cells * grid_enum::HeI_density];
+  Cool.fields.HeII_density  = &C.host[H.n_cells * grid_enum::HeII_density];
+  Cool.fields.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density];
+  Cool.fields.e_density     = &C.host[H.n_cells * grid_enum::e_density];
 
-#ifdef OUTPUT_TEMPERATURE
-Cool.temperature = (Real *) malloc(Cool.field_size * sizeof(Real));
-#endif
-}
+  #ifdef GRACKLE_METALS
+  chprintf(" Allocating memory for: metal density\n");
+  Cool.fields.metal_density = &C.host[H.n_cells * grid_enum::metal_density];
+  #else
+  Cool.fields.metal_density = NULL;
+  #endif
 
+  #ifdef OUTPUT_TEMPERATURE
+  Cool.temperature = (Real *)malloc(Cool.field_size * sizeof(Real));
+  #endif
+}
 
-void Cool_GK::Free_Memory( ){
+void Cool_GK::Free_Memory()
+{
   // free( fields.x_velocity );
   // free( fields.y_velocity );
   // free( fields.z_velocity );
-  free( fields.internal_energy );
+  free(fields.internal_energy);
 
   #ifdef OUTPUT_TEMPERATURE
-  free( temperature );
+  free(temperature);
   #endif
-
 }
 
 #endif
-
diff --git a/src/cooling_grackle/cool_grackle.h b/src/cooling_grackle/cool_grackle.h
index c1fab3812..0014f7e75 100644
--- a/src/cooling_grackle/cool_grackle.h
+++ b/src/cooling_grackle/cool_grackle.h
@@ -1,18 +1,17 @@
 #ifdef COOLING_GRACKLE
 
-#ifndef INIT_GRACKLE_H
-#define INIT_GRACKLE_H
+  #ifndef INIT_GRACKLE_H
+    #define INIT_GRACKLE_H
 
-#include "../global/global.h"
+    #include "../global/global.h"
 
 extern "C" {
-#include <grackle.h>
+    #include <grackle.h>
 }
 
 class Cool_GK
 {
-  public:
-
+ public:
   code_units units;
   chemistry_data *data;
 
@@ -27,9 +26,9 @@ class Cool_GK
 
   Real temperature_units;
 
-  #ifdef OUTPUT_TEMPERATURE
+    #ifdef OUTPUT_TEMPERATURE
   Real *temperature;
-  #endif
+    #endif
 
   Real tiny_number;
 
@@ -39,17 +38,15 @@ class Cool_GK
   grackle_field_data fields;
   int field_size;
 
+  Cool_GK(void);
 
-Cool_GK( void );
-
-void Initialize( struct parameters *P, Cosmology &Cosmo );
+  void Initialize(struct Parameters *P, Cosmology &Cosmo);
 
-void Free_Memory();
-// void Do_Cooling_Step( Real dt );
-
-Real Get_Mean_Molecular_Weight( int cell_id );
+  void Free_Memory();
+  // void Do_Cooling_Step( Real dt );
 
+  Real Get_Mean_Molecular_Weight(int cell_id);
 };
 
-#endif
+  #endif
 #endif
diff --git a/src/cooling_grackle/grackle_functions.cpp b/src/cooling_grackle/grackle_functions.cpp
index 6e1b48ed6..d68281c3d 100644
--- a/src/cooling_grackle/grackle_functions.cpp
+++ b/src/cooling_grackle/grackle_functions.cpp
@@ -1,124 +1,120 @@
 #ifdef COOLING_GRACKLE
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../io/io.h"
-#include "../cooling_grackle/cool_grackle.h"
-
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
-
-
-
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
 
+  #include "../cooling_grackle/cool_grackle.h"
+  #include "../io/io.h"
 
-void Grid3D::Initialize_Fields_Grackle(){
+  #ifdef PARALLEL_OMP
+    #include "../utils/parallel_omp.h"
+  #endif
 
+void Grid3D::Initialize_Fields_Grackle()
+{
   int nx_g, ny_g, nz_g, nx, ny, nz, nGHST;
-  nx_g = H.nx;
-  ny_g = H.ny;
-  nz_g = H.nz;
-  nx = H.nx_real;
-  ny = H.ny_real;
-  nz = H.nz_real;
+  nx_g  = H.nx;
+  ny_g  = H.ny;
+  nz_g  = H.nz;
+  nx    = H.nx_real;
+  ny    = H.ny_real;
+  nz    = H.nz_real;
   nGHST = H.n_ghost;
 
   Real d, vx, vy, vz, E, Ekin, GE, U;
   bool flag_DE;
   int i, j, k, i_g, j_g, k_g, id;
-  for (k=0; k<nz_g; k++) {
-    for (j=0; j<ny_g; j++) {
-      for (i=0; i<nx_g; i++) {
-        id = i + j*nx_g + k*nx_g*ny_g;
+  for (k = 0; k < nz_g; k++) {
+    for (j = 0; j < ny_g; j++) {
+      for (i = 0; i < nx_g; i++) {
+        id = i + j * nx_g + k * nx_g * ny_g;
         // Cool.fields.x_velocity[id] = 0.0;
         // Cool.fields.y_velocity[id] = 0.0;
         // Cool.fields.z_velocity[id] = 0.0;
 
-        Cool.fields.internal_energy[id] = C.GasEnergy[id]  / C.density[id] * Cool.energy_conv / Cosmo.current_a / Cosmo.current_a ;
-
+        Cool.fields.internal_energy[id] =
+            C.GasEnergy[id] / C.density[id] * Cool.energy_conv / Cosmo.current_a / Cosmo.current_a;
       }
     }
   }
 
   #ifdef OUTPUT_TEMPERATURE
-  if (calculate_temperature(&Cool.units, &Cool.fields,  Cool.temperature) == 0) {
-    chprintf( "GRACKLE: Error in calculate_temperature.\n");
-    return ;
+  if (calculate_temperature(&Cool.units, &Cool.fields, Cool.temperature) == 0) {
+    chprintf("GRACKLE: Error in calculate_temperature.\n");
+    return;
   }
-  Real temp_avrg = 0 ;
-  for (k=0; k<nz; k++) {
-    for (j=0; j<ny; j++) {
-      for (i=0; i<nx; i++) {
-        id = (i+nGHST) + (j+nGHST)*nx_g + (k+nGHST)*nx_g*ny_g;
+  Real temp_avrg = 0;
+  for (k = 0; k < nz; k++) {
+    for (j = 0; j < ny; j++) {
+      for (i = 0; i < nx; i++) {
+        id = (i + nGHST) + (j + nGHST) * nx_g + (k + nGHST) * nx_g * ny_g;
         temp_avrg += Cool.temperature[id];
       }
     }
   }
-  temp_avrg /= nz*ny*nx;
+  temp_avrg /= nz * ny * nx;
   chprintf("Average Temperature = %le K.\n", temp_avrg);
   #endif
-
-
 }
 
-
-void Grid3D::Copy_Fields_To_Grackle(){
+void Grid3D::Copy_Fields_To_Grackle()
+{
   #ifndef PARALLEL_OMP
-  Copy_Fields_To_Grackle_function( 0, H.nz_real );
+  Copy_Fields_To_Grackle_function(0, H.nz_real);
   #else
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+    #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Grid_Indxs( H.nz_real, n_omp_procs, omp_id, &g_start, &g_end  );
+    Get_OMP_Grid_Indxs(H.nz_real, n_omp_procs, omp_id, &g_start, &g_end);
 
-    Copy_Fields_To_Grackle_function( g_start, g_end );
+    Copy_Fields_To_Grackle_function(g_start, g_end);
   }
   #endif
 }
 
-void Grid3D::Update_Internal_Energy(){
+void Grid3D::Update_Internal_Energy()
+{
   #ifndef PARALLEL_OMP
-  Update_Internal_Energy_function( 0, H.nz_real );
+  Update_Internal_Energy_function(0, H.nz_real);
   #else
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+    #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Grid_Indxs( H.nz_real, n_omp_procs, omp_id, &g_start, &g_end  );
+    Get_OMP_Grid_Indxs(H.nz_real, n_omp_procs, omp_id, &g_start, &g_end);
 
-    Update_Internal_Energy_function( g_start, g_end );
+    Update_Internal_Energy_function(g_start, g_end);
   }
   #endif
 }
 
-void Grid3D::Copy_Fields_To_Grackle_function( int g_start, int g_end ){
-
+void Grid3D::Copy_Fields_To_Grackle_function(int g_start, int g_end)
+{
   int nx_g, ny_g, nz_g, nx, ny, nz, nGHST;
-  nx_g = H.nx;
-  ny_g = H.ny;
-  nz_g = H.nz;
-  nx = H.nx_real;
-  ny = H.ny_real;
-  nz = H.nz_real;
+  nx_g  = H.nx;
+  ny_g  = H.ny;
+  nz_g  = H.nz;
+  nx    = H.nx_real;
+  ny    = H.ny_real;
+  nz    = H.nz_real;
   nGHST = H.n_ghost;
 
   Real d, vx, vy, vz, E, Ekin, GE, U;
   int flag_DE;
   int k, j, i, id;
-  for (k=g_start; k<g_end; k++) {
-    for (j=0; j<ny; j++) {
-      for (i=0; i<nx; i++) {
-        id = (i+nGHST) + (j+nGHST)*nx_g + (k+nGHST)*nx_g*ny_g;
-        d = C.density[id];
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny; j++) {
+      for (i = 0; i < nx; i++) {
+        id = (i + nGHST) + (j + nGHST) * nx_g + (k + nGHST) * nx_g * ny_g;
+        d  = C.density[id];
         // vx = C.momentum_x[id] / d;
         // vy = C.momentum_y[id] / d;
         // vz = C.momentum_z[id] / d;
@@ -126,39 +122,39 @@ void Grid3D::Copy_Fields_To_Grackle_function( int g_start, int g_end ){
         // Ekin = 0.5 * d * ( vx*vx + vy*vy + vz*vz );
         GE = C.GasEnergy[id];
 
-        //The Flag for Dual Energy Is set on the Sync_Energies_3D step before cooling step
-        // flag_DE = Select_Internal_Energy_From_DE( E, E - Ekin, GE );
-        // Cool.flags_DE[id] = flag_DE;
+        // The Flag for Dual Energy Is set on the Sync_Energies_3D step before
+        // cooling step
+        //  flag_DE = Select_Internal_Energy_From_DE( E, E - Ekin, GE );
+        //  Cool.flags_DE[id] = flag_DE;
 
         // if ( flag_DE ) U = GE;
         // else U = E - Ekin;
-        U = GE;
-        Cool.fields.internal_energy[id] = U / d * Cool.energy_conv / Cosmo.current_a / Cosmo.current_a ;
+        U                               = GE;
+        Cool.fields.internal_energy[id] = U / d * Cool.energy_conv / Cosmo.current_a / Cosmo.current_a;
       }
     }
   }
 }
 
-void Grid3D::Update_Internal_Energy_function( int g_start, int g_end ){
-
-
+void Grid3D::Update_Internal_Energy_function(int g_start, int g_end)
+{
   int nx_g, ny_g, nz_g, nx, ny, nz, nGHST;
-  nx_g = H.nx;
-  ny_g = H.ny;
-  nz_g = H.nz;
-  nx = H.nx_real;
-  ny = H.ny_real;
-  nz = H.nz_real;
+  nx_g  = H.nx;
+  ny_g  = H.ny;
+  nz_g  = H.nz;
+  nx    = H.nx_real;
+  ny    = H.ny_real;
+  nz    = H.nz_real;
   nGHST = H.n_ghost;
   // Real ge_0, ge_1, delta_ge;
   // Real dens;
   Real dens, vx, vy, vz, E, Ekin, GE, U_0, U_1, delta_U;
   int flag_DE;
   int k, j, i, id;
-  for (k=g_start; k<g_end; k++) {
-    for (j=0; j<ny; j++) {
-      for (i=0; i<nx; i++) {
-        id = (i+nGHST) + (j+nGHST)*nx_g + (k+nGHST)*nx_g*ny_g;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny; j++) {
+      for (i = 0; i < nx; i++) {
+        id   = (i + nGHST) + (j + nGHST) * nx_g + (k + nGHST) * nx_g * ny_g;
         dens = C.density[id];
         // vx = C.momentum_x[id] / dens;
         // vy = C.momentum_y[id] / dens;
@@ -171,73 +167,58 @@ void Grid3D::Update_Internal_Energy_function( int g_start, int g_end ){
         // // PRESSURE_DE
         // if ( flag_DE == 0 ) U_0 = E - Ekin;
         // else if ( flag_DE == 1 ) U_0 = GE;
-        // else std::cout << " ### Frag_DE ERROR: Flag_DE: " << flag_DE << std::endl;
-        U_0 = GE;
-        U_1 = Cool.fields.internal_energy[id] * dens / Cool.energy_conv  * Cosmo.current_a * Cosmo.current_a;
+        // else std::cout << " ### Frag_DE ERROR: Flag_DE: " << flag_DE <<
+        // std::endl;
+        U_0     = GE;
+        U_1     = Cool.fields.internal_energy[id] * dens / Cool.energy_conv * Cosmo.current_a * Cosmo.current_a;
         delta_U = U_1 - U_0;
-        C.GasEnergy[id] += delta_U ;
-        C.Energy[id] += delta_U ;
-
-
+        C.GasEnergy[id] += delta_U;
+        C.Energy[id] += delta_U;
       }
     }
   }
 }
 
-void Grid3D::Do_Cooling_Step_Grackle(){
-
+void Grid3D::Do_Cooling_Step_Grackle()
+{
   Real kpc_cgs = KPC_CGS;
   // Update the units conversion
-  Cool.units.a_value = Cosmo.current_a / Cool.units.a_units;
-  Cool.units.density_units = Cool.dens_to_CGS  / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a ;
-  Cool.units.length_units = kpc_cgs / Cosmo.cosmo_h * Cosmo.current_a;
-
+  Cool.units.a_value       = Cosmo.current_a / Cool.units.a_units;
+  Cool.units.density_units = Cool.dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a;
+  Cool.units.length_units  = kpc_cgs / Cosmo.cosmo_h * Cosmo.current_a;
 
   Copy_Fields_To_Grackle();
 
-
   Real dt_cool = Cosmo.dt_secs;
-  chprintf( " dt_cool: %e s\n", dt_cool );
-  if (solve_chemistry(&Cool.units, &Cool.fields, dt_cool / Cool.units.time_units ) == 0) {
-    chprintf( "GRACKLE: Error in solve_chemistry.\n");
-    return ;
+  chprintf(" dt_cool: %e s\n", dt_cool);
+  if (solve_chemistry(&Cool.units, &Cool.fields, dt_cool / Cool.units.time_units) == 0) {
+    chprintf("GRACKLE: Error in solve_chemistry.\n");
+    return;
   }
 
   #ifdef OUTPUT_TEMPERATURE
-  if (calculate_temperature(&Cool.units, &Cool.fields,  Cool.temperature) == 0) {
-    chprintf( "GRACKLE: Error in calculate_temperature.\n");
-    return ;
+  if (calculate_temperature(&Cool.units, &Cool.fields, Cool.temperature) == 0) {
+    chprintf("GRACKLE: Error in calculate_temperature.\n");
+    return;
   }
   #endif
 
   Update_Internal_Energy();
-
 }
 
-Real Cool_GK::Get_Mean_Molecular_Weight( int cell_id ){
-
+Real Cool_GK::Get_Mean_Molecular_Weight(int cell_id)
+{
   Real mu, dens, HI_dens, HII_dens, HeI_dens, HeII_dens, HeIII_dens;
 
-  dens = fields.density[cell_id];
-  HI_dens = fields.HI_density[cell_id];
-  HII_dens = fields.HII_density[cell_id];
-  HeI_dens = fields.HeI_density[cell_id];
-  HeII_dens = fields.HeII_density[cell_id];
+  dens       = fields.density[cell_id];
+  HI_dens    = fields.HI_density[cell_id];
+  HII_dens   = fields.HII_density[cell_id];
+  HeI_dens   = fields.HeI_density[cell_id];
+  HeII_dens  = fields.HeII_density[cell_id];
   HeIII_dens = fields.HeIII_density[cell_id];
 
-  mu = dens / ( HI_dens + 2*HII_dens + ( HeI_dens + 2*HeII_dens + 3*HeIII_dens) / 4 );
+  mu = dens / (HI_dens + 2 * HII_dens + (HeI_dens + 2 * HeII_dens + 3 * HeIII_dens) / 4);
   return mu;
-
 }
 
-
-
-
-
-
-
-
-
-
-
 #endif
diff --git a/src/cosmology/cosmology.cpp b/src/cosmology/cosmology.cpp
index a1a6e90a1..6575798e2 100644
--- a/src/cosmology/cosmology.cpp
+++ b/src/cosmology/cosmology.cpp
@@ -1,37 +1,34 @@
 #ifdef COSMOLOGY
 
-#include "../cosmology/cosmology.h"
-#include "../io/io.h"
+  #include "../cosmology/cosmology.h"
 
+  #include "../io/io.h"
 
+Cosmology::Cosmology(void) {}
 
-Cosmology::Cosmology( void ){}
+void Cosmology::Initialize(struct Parameters *P, Grav3D &Grav, Particles3D &Particles)
+{
+  chprintf("Cosmological Simulation\n");
 
-void Cosmology::Initialize( struct parameters *P, Grav3D &Grav, Particles_3D &Particles){
+  H0      = P->H0;
+  cosmo_h = H0 / 100;
+  H0 /= 1000;  //[km/s / kpc]
+  Omega_M = P->Omega_M;
+  Omega_L = P->Omega_L;
+  Omega_K = 1 - (Omega_M + Omega_L);
+  Omega_b = P->Omega_b;
 
-  chprintf( "Cosmological Simulation\n");
-
-  H0 = P-> H0;
-  cosmo_h = H0/100;
-  H0 /= 1000;               //[km/s / kpc]
-  Omega_M = P-> Omega_M;
-  Omega_L = P-> Omega_L;
-  Omega_K = 1 - ( Omega_M + Omega_L );
-  Omega_b = P-> Omega_b;
-
-  if(strcmp(P->init, "Read_Grid")==0){
+  if (strcmp(P->init, "Read_Grid") == 0) {
     // Read scale factor value from Particles
     current_z = Particles.current_z;
     current_a = Particles.current_a;
-  }
-  else{
-    current_z = P->Init_redshift;
-    current_a = 1. / ( current_z + 1 );
+  } else {
+    current_z           = P->Init_redshift;
+    current_a           = 1. / (current_z + 1);
     Particles.current_z = current_z;
     Particles.current_a = current_a;
   }
 
-
   // Set Scale factor in Gravity
   Grav.current_a = current_a;
 
@@ -42,53 +39,39 @@ void Cosmology::Initialize( struct parameters *P, Grav3D &Grav, Particles_3D &Pa
   Grav.Gconst = cosmo_G;
 
   max_delta_a = 0.001;
-  delta_a = max_delta_a;
+  delta_a     = max_delta_a;
 
   // Initialize Time and set the time conversion
-  t_secs = 0;
+  t_secs          = 0;
   time_conversion = KPC;
 
-
   // Set Normalization factors
-  r_0_dm   = P->xlen/P->nx;
-  t_0_dm   = 1. / H0;
-  v_0_dm   = r_0_dm / t_0_dm / cosmo_h;
-  rho_0_dm = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_M /cosmo_h/cosmo_h;
-  rho_mean_baryon = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_b /cosmo_h/cosmo_h;
+  r_0_dm          = P->xlen / P->nx;
+  t_0_dm          = 1. / H0;
+  v_0_dm          = r_0_dm / t_0_dm / cosmo_h;
+  rho_0_dm        = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h;
+  rho_mean_baryon = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_b / cosmo_h / cosmo_h;
   // dens_avrg = 0;
 
-  r_0_gas = 1.0;
-  rho_0_gas = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_M /cosmo_h/cosmo_h;
-  t_0_gas = 1/H0*cosmo_h;
-  v_0_gas = r_0_gas / t_0_gas;
+  r_0_gas   = 1.0;
+  rho_0_gas = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h;
+  t_0_gas   = 1 / H0 * cosmo_h;
+  v_0_gas   = r_0_gas / t_0_gas;
   phi_0_gas = v_0_gas * v_0_gas;
-  p_0_gas = rho_0_gas * v_0_gas * v_0_gas;
-  e_0_gas = v_0_gas * v_0_gas;
-
-  chprintf( " H0: %f\n", H0 * 1000 );
-  chprintf( " Omega_L: %f\n", Omega_L );
-  chprintf( " Omega_M: %f\n", Omega_M );
-  chprintf( " Omega_b: %f\n", Omega_b );
-  chprintf( " Current_a: %f\n", current_a );
-  chprintf( " Current_z: %f\n", current_z );
-  chprintf( " rho_0: %f\n", rho_0_gas );
-  chprintf( " v_0: %f \n", v_0_gas );
-  chprintf( " Max delta_a: %f \n", MAX_DELTA_A);
-
-  Set_Scale_Outputs( P );
-
+  p_0_gas   = rho_0_gas * v_0_gas * v_0_gas;
+  e_0_gas   = v_0_gas * v_0_gas;
+
+  chprintf(" H0: %f\n", H0 * 1000);
+  chprintf(" Omega_L: %f\n", Omega_L);
+  chprintf(" Omega_M: %f\n", Omega_M);
+  chprintf(" Omega_b: %f\n", Omega_b);
+  chprintf(" Current_a: %f\n", current_a);
+  chprintf(" Current_z: %f\n", current_z);
+  chprintf(" rho_0: %f\n", rho_0_gas);
+  chprintf(" v_0: %f \n", v_0_gas);
+  chprintf(" Max delta_a: %f \n", MAX_DELTA_A);
+
+  Set_Scale_Outputs(P);
 }
 
-
-
-
-
-
-
-
-
-
-
-
-
 #endif
diff --git a/src/cosmology/cosmology.h b/src/cosmology/cosmology.h
index b45e904b1..1e7c9bd1c 100644
--- a/src/cosmology/cosmology.h
+++ b/src/cosmology/cosmology.h
@@ -1,19 +1,19 @@
 #ifdef COSMOLOGY
 
-#ifndef COSMOLOGY_H
-#define COSMOLOGY_H
+  #ifndef COSMOLOGY_H
+    #define COSMOLOGY_H
 
-#include <stdio.h>
-#include <cmath>
-#include "../global/global.h"
-#include "../particles/particles_3D.h"
-#include "../gravity/grav3D.h"
+    #include <stdio.h>
 
+    #include <cmath>
+
+    #include "../global/global.h"
+    #include "../gravity/grav3D.h"
+    #include "../particles/particles_3D.h"
 
 class Cosmology
 {
-public:
-
+ public:
   Real H0;
   Real Omega_M;
   Real Omega_L;
@@ -54,21 +54,19 @@ class Cosmology
   Real next_output;
   bool exit_now;
 
+  Cosmology(void);
+  void Initialize(struct Parameters *P, Grav3D &Grav, Particles3D &Particles);
 
-  Cosmology( void );
-  void Initialize( struct parameters *P, Grav3D &Grav, Particles_3D &Particles );
-
-  void Load_Scale_Outputs( struct parameters *P );
-  void Set_Scale_Outputs( struct parameters *P );
+  void Load_Scale_Outputs(struct Parameters *P);
+  void Set_Scale_Outputs(struct Parameters *P);
 
-  void Set_Next_Scale_Output( );
+  void Set_Next_Scale_Output();
 
-  Real Get_Hubble_Parameter( Real a );
-
-  Real Get_da_from_dt( Real dt );
-  Real Get_dt_from_da( Real da );
+  Real Get_Hubble_Parameter(Real a);
 
+  Real Get_da_from_dt(Real dt);
+  Real Get_dt_from_da(Real da);
 };
 
-#endif
+  #endif
 #endif
diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp
index c1ceb8299..f00c7e174 100644
--- a/src/cosmology/cosmology_functions.cpp
+++ b/src/cosmology/cosmology_functions.cpp
@@ -1,133 +1,133 @@
 #ifdef COSMOLOGY
 
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../grid/grid_enum.h"
+  #include "../io/io.h"
 
-#include "../grid/grid3D.h"
-#include "../global/global.h"
-#include "../io/io.h"
-
-
-
-void Grid3D::Initialize_Cosmology( struct parameters *P ){
-
-  chprintf( "Initializing Cosmology... \n");
-  Cosmo.Initialize( P, Grav, Particles );
+void Grid3D::Initialize_Cosmology(struct Parameters *P)
+{
+  chprintf("Initializing Cosmology... \n");
+  Cosmo.Initialize(P, Grav, Particles);
 
   // Change to comoving Cosmological System
-  Change_Cosmological_Frame_Sytem( true );
+  Change_Cosmological_Frame_Sytem(true);
 
-  if ( fabs( Cosmo.current_a - Cosmo.next_output ) < 1e-5 ) H.Output_Now = true;
-
-  chprintf( "Cosmology Successfully Initialized. \n\n");
+  if (fabs(Cosmo.current_a - Cosmo.next_output) < 1e-5) {
+    H.Output_Now = true;
+  }
 
+  chprintf("Cosmology Successfully Initialized. \n\n");
 }
 
-Real Cosmology::Get_da_from_dt( Real dt ){
-  Real a2 = current_a * current_a;
-  Real a_dot = sqrt( Omega_M/current_a + a2*Omega_L + Omega_K ) * H0 ;
+Real Cosmology::Get_da_from_dt(Real dt)
+{
+  Real a2    = current_a * current_a;
+  Real a_dot = sqrt(Omega_M / current_a + a2 * Omega_L + Omega_K) * H0;
   return a_dot * dt;
 }
 
-Real Cosmology::Get_dt_from_da( Real da ){
-  Real a2 = current_a * current_a;
-  Real a_dot = sqrt( Omega_M/current_a + a2*Omega_L + Omega_K ) * H0 ;
+Real Cosmology::Get_dt_from_da(Real da)
+{
+  Real a2    = current_a * current_a;
+  Real a_dot = sqrt(Omega_M / current_a + a2 * Omega_L + Omega_K) * H0;
   return da / a_dot;
 }
 
-Real Cosmology::Get_Hubble_Parameter( Real a ){
-  Real a2 = a * a;
-  Real a3 = a2 * a;
-  Real factor = ( Omega_M/a3 + Omega_K/a2 + Omega_L );
+Real Cosmology::Get_Hubble_Parameter(Real a)
+{
+  Real a2     = a * a;
+  Real a3     = a2 * a;
+  Real factor = (Omega_M / a3 + Omega_K / a2 + Omega_L);
   return H0 * sqrt(factor);
 }
 
-void Grid3D::Change_Cosmological_Frame_Sytem( bool forward ){
-
-  if (forward) chprintf( " Converting to Cosmological Comoving System\n");
-  else chprintf( " Converting to Cosmological Physical System\n");
+void Grid3D::Change_Cosmological_Frame_Sytem(bool forward)
+{
+  if (forward) {
+    chprintf(" Converting to Cosmological Comoving System\n");
+  } else {
+    chprintf(" Converting to Cosmological Physical System\n");
+  }
 
-  Change_DM_Frame_System( forward );
+  Change_DM_Frame_System(forward);
   #ifndef ONLY_PARTICLES
 
-  Change_GAS_Frame_System_GPU( forward );
+  Change_GAS_Frame_System_GPU(forward);
 
-  Change_GAS_Frame_System( forward );
-  #endif//ONLY_PARTICLES
+  Change_GAS_Frame_System(forward);
+  #endif  // ONLY_PARTICLES
 }
-void Grid3D::Change_DM_Frame_System( bool forward ){
-
+void Grid3D::Change_DM_Frame_System(bool forward)
+{
   #ifdef PARTICLES_CPU
 
   part_int_t pIndx;
   Real vel_factor;
   vel_factor = 1;
 
-
-  for ( pIndx=0; pIndx<Particles.n_local; pIndx++ ){
+  for (pIndx = 0; pIndx < Particles.n_local; pIndx++) {
     Particles.vel_x[pIndx] *= vel_factor;
     Particles.vel_y[pIndx] *= vel_factor;
     Particles.vel_z[pIndx] *= vel_factor;
   }
 
-  #endif //PARTICLES_CPU
+  #endif  // PARTICLES_CPU
 
-  // NOTE:Not implemented for PARTICLES_GPU, doesn't matter as long as vel_factor=1
+  // NOTE:Not implemented for PARTICLES_GPU, doesn't matter as long as
+  // vel_factor=1
 }
 
-void Grid3D::Change_GAS_Frame_System( bool forward ){
-
+void Grid3D::Change_GAS_Frame_System(bool forward)
+{
   Real dens_factor, momentum_factor, energy_factor;
-  if ( forward ){
-    dens_factor = 1 / Cosmo.rho_0_gas;
+  if (forward) {
+    dens_factor     = 1 / Cosmo.rho_0_gas;
     momentum_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas * Cosmo.current_a;
-    energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a;
-  }
-  else{
-    dens_factor = Cosmo.rho_0_gas;
-    momentum_factor =  Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a;
-    energy_factor =  Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a;
+    energy_factor   = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a;
+  } else {
+    dens_factor     = Cosmo.rho_0_gas;
+    momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a;
+    energy_factor   = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a;
   }
   int k, j, i, id;
-  for (k=0; k<H.nz; k++) {
-    for (j=0; j<H.ny; j++) {
-      for (i=0; i<H.nx; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
-        C.density[id] = C.density[id] * dens_factor ;
-        C.momentum_x[id] = C.momentum_x[id] *  momentum_factor ;
-        C.momentum_y[id] = C.momentum_y[id] *  momentum_factor ;
-        C.momentum_z[id] = C.momentum_z[id] *  momentum_factor ;
-        C.Energy[id] = C.Energy[id] * energy_factor ;
-
-        #ifdef DE
-        C.GasEnergy[id] = C.GasEnergy[id]  * energy_factor ;
-        #endif
-
-        #ifdef COOLING_GRACKLE
-        C.scalar[0*H.n_cells + id] *= dens_factor;
-        C.scalar[1*H.n_cells + id] *= dens_factor;
-        C.scalar[2*H.n_cells + id] *= dens_factor;
-        C.scalar[3*H.n_cells + id] *= dens_factor;
-        C.scalar[4*H.n_cells + id] *= dens_factor;
-        C.scalar[5*H.n_cells + id] *= dens_factor;
-        #ifdef GRACKLE_METALS
-        C.scalar[6*H.n_cells + id] *= dens_factor;
-        #endif
-        #endif//COOLING_GRACKLE
-        
-        #ifdef CHEMISTRY_GPU
-        C.scalar[0*H.n_cells + id] *= dens_factor;
-        C.scalar[1*H.n_cells + id] *= dens_factor;
-        C.scalar[2*H.n_cells + id] *= dens_factor;
-        C.scalar[3*H.n_cells + id] *= dens_factor;
-        C.scalar[4*H.n_cells + id] *= dens_factor;
-        C.scalar[5*H.n_cells + id] *= dens_factor;
-        #endif
-
+  for (k = 0; k < H.nz; k++) {
+    for (j = 0; j < H.ny; j++) {
+      for (i = 0; i < H.nx; i++) {
+        id               = i + j * H.nx + k * H.nx * H.ny;
+        C.density[id]    = C.density[id] * dens_factor;
+        C.momentum_x[id] = C.momentum_x[id] * momentum_factor;
+        C.momentum_y[id] = C.momentum_y[id] * momentum_factor;
+        C.momentum_z[id] = C.momentum_z[id] * momentum_factor;
+        C.Energy[id]     = C.Energy[id] * energy_factor;
+
+  #ifdef DE
+        C.GasEnergy[id] = C.GasEnergy[id] * energy_factor;
+  #endif
+
+  #ifdef COOLING_GRACKLE
+        C.HI_density[id] *= dens_factor;
+        C.HII_density[id] *= dens_factor;
+        C.HeI_density[id] *= dens_factor;
+        C.HeII_density[id] *= dens_factor;
+        C.HeIII_density[id] *= dens_factor;
+        C.e_density[id] *= dens_factor;
+    #ifdef GRACKLE_METALS
+        C.metal_density[id] *= dens_factor;
+    #endif
+  #endif  // COOLING_GRACKLE
+
+  #ifdef CHEMISTRY_GPU
+        C.HI_density[id] *= dens_factor;
+        C.HII_density[id] *= dens_factor;
+        C.HeI_density[id] *= dens_factor;
+        C.HeII_density[id] *= dens_factor;
+        C.HeIII_density[id] *= dens_factor;
+        C.e_density[id] *= dens_factor;
+  #endif
       }
     }
   }
 }
 
-
-
-
 #endif
diff --git a/src/cosmology/cosmology_functions_gpu.cu b/src/cosmology/cosmology_functions_gpu.cu
index 8b1d80efa..aac1335c5 100644
--- a/src/cosmology/cosmology_functions_gpu.cu
+++ b/src/cosmology/cosmology_functions_gpu.cu
@@ -1,10 +1,9 @@
 #if defined(COSMOLOGY)
 
+  #include "../cosmology/cosmology_functions_gpu.h"
 
-#include "../cosmology/cosmology_functions_gpu.h"
-
-
-// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K ){
+// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real
+// Omega_L, Real Omega_K ){
 //   Real a2 = a * a;
 //   Real a3 = a2 * a;
 //   Real factor = ( Omega_M/a3 + Omega_K/a2 + Omega_L );
@@ -12,46 +11,45 @@
 //
 // }
 
-
-void __global__ Change_GAS_Frame_System_kernel( Real dens_factor, Real momentum_factor, Real energy_factor,
-          int nx, int ny, int nz, Real *density_d, Real *momentum_x_d, Real *momentum_y_d, Real *momentum_z_d,
-          Real *Energy_d, Real *GasEnergy_d ){
-
+void __global__ Change_GAS_Frame_System_kernel(Real dens_factor, Real momentum_factor, Real energy_factor, int nx,
+                                               int ny, int nz, Real *density_d, Real *momentum_x_d, Real *momentum_y_d,
+                                               Real *momentum_z_d, Real *Energy_d, Real *GasEnergy_d)
+{
   int tid_x, tid_y, tid_z, tid_grid;
   tid_x = blockIdx.x * blockDim.x + threadIdx.x;
   tid_y = blockIdx.y * blockDim.y + threadIdx.y;
   tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
-  if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return;
+  if (tid_x >= nx || tid_y >= ny || tid_z >= nz) {
+    return;
+  }
 
-  tid_grid = tid_x + tid_y*nx + tid_z*nx*ny;
+  tid_grid = tid_x + tid_y * nx + tid_z * nx * ny;
 
-  density_d[tid_grid]    = density_d[tid_grid]    * dens_factor;
+  density_d[tid_grid]    = density_d[tid_grid] * dens_factor;
   momentum_x_d[tid_grid] = momentum_x_d[tid_grid] * momentum_factor;
   momentum_y_d[tid_grid] = momentum_y_d[tid_grid] * momentum_factor;
   momentum_z_d[tid_grid] = momentum_z_d[tid_grid] * momentum_factor;
-  Energy_d[tid_grid]     = Energy_d[tid_grid]     * energy_factor;
+  Energy_d[tid_grid]     = Energy_d[tid_grid] * energy_factor;
   #ifdef DE
-  GasEnergy_d[tid_grid]  = GasEnergy_d[tid_grid]  * energy_factor;
+  GasEnergy_d[tid_grid] = GasEnergy_d[tid_grid] * energy_factor;
   #endif
 
-  //NOTE If CHEMISTRY_GPU I need to add the conversion for the chemical species here
-
+  // NOTE If CHEMISTRY_GPU I need to add the conversion for the chemical species
+  // here
 }
 
-
-void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){
-
+void Grid3D::Change_GAS_Frame_System_GPU(bool forward)
+{
   Real dens_factor, momentum_factor, energy_factor;
-  if ( forward ){
-    dens_factor = 1 / Cosmo.rho_0_gas;
+  if (forward) {
+    dens_factor     = 1 / Cosmo.rho_0_gas;
     momentum_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas * Cosmo.current_a;
-    energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a;
-  }
-  else{
-    dens_factor = Cosmo.rho_0_gas;
-    momentum_factor =  Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a;
-    energy_factor =  Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a;
+    energy_factor   = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a;
+  } else {
+    dens_factor     = Cosmo.rho_0_gas;
+    momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a;
+    energy_factor   = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a;
   }
 
   int nx, ny, nz;
@@ -60,9 +58,9 @@ void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){
   nz = H.nz;
 
   // set values for GPU kernels
-  int tpb_x = TPBX_COSMO;
-  int tpb_y = TPBY_COSMO;
-  int tpb_z = TPBZ_COSMO;
+  int tpb_x   = TPBX_COSMO;
+  int tpb_y   = TPBY_COSMO;
+  int tpb_z   = TPBZ_COSMO;
   int ngrid_x = (nx - 1) / tpb_x + 1;
   int ngrid_y = (ny - 1) / tpb_y + 1;
   int ngrid_z = (nz - 1) / tpb_z + 1;
@@ -78,12 +76,9 @@ void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){
   GasEnergy_d = NULL;
   #endif
 
-  hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, 0, dens_factor, momentum_factor, energy_factor, nx, ny, nz,
-                 C.d_density, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, C.d_Energy, GasEnergy_d   );
-
+  hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, 0, dens_factor, momentum_factor,
+                     energy_factor, nx, ny, nz, C.d_density, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, C.d_Energy,
+                     GasEnergy_d);
 }
 
-
-
-
-#endif //COSMOLOGY
+#endif  // COSMOLOGY
diff --git a/src/cosmology/cosmology_functions_gpu.h b/src/cosmology/cosmology_functions_gpu.h
index ced300114..092e13bdf 100644
--- a/src/cosmology/cosmology_functions_gpu.h
+++ b/src/cosmology/cosmology_functions_gpu.h
@@ -1,18 +1,15 @@
-#if defined(COSMOLOGY) 
+#if defined(COSMOLOGY)
 
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../utils/gpu.hpp"
 
-#include "../grid/grid3D.h"
-#include "../global/global.h"
-#include "../io/io.h"
-#include "../utils/gpu.hpp"
+  #define TPBX_COSMO 16
+  #define TPBY_COSMO 8
+  #define TPBZ_COSMO 8
 
-#define TPBX_COSMO 16
-#define TPBY_COSMO 8
-#define TPBZ_COSMO 8
+// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real
+// Omega_L, Real Omega_K );
 
-// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K );
-
-
-
-
-#endif //COSMOLOGY
+#endif  // COSMOLOGY
diff --git a/src/cosmology/io_cosmology.cpp b/src/cosmology/io_cosmology.cpp
index c4f9aa029..7492a814c 100644
--- a/src/cosmology/io_cosmology.cpp
+++ b/src/cosmology/io_cosmology.cpp
@@ -1,36 +1,33 @@
 #ifdef COSMOLOGY
 
-#include <iostream>
-#include <fstream>
-#include "../cosmology/cosmology.h"
-#include "../io/io.h"
+  #include <fstream>
+  #include <iostream>
 
-using namespace std;
-
-
-void Cosmology::Load_Scale_Outputs( struct parameters *P ) {
+  #include "../cosmology/cosmology.h"
+  #include "../io/io.h"
 
+void Cosmology::Load_Scale_Outputs(struct Parameters *P)
+{
   char filename_1[100];
   // create the filename to read from
   strcpy(filename_1, P->scale_outputs_file);
-  chprintf( " Loading Scale_Factor Outpus: %s\n", filename_1);
+  chprintf(" Loading Scale_Factor Outpus: %s\n", filename_1);
 
-  ifstream file_out ( filename_1 );
-  string line;
+  std::ifstream file_out(filename_1);
+  std::string line;
   Real a_value;
-  if (file_out.is_open()){
-    while ( getline (file_out,line) ){
-      a_value = atof( line.c_str() );
-      scale_outputs.push_back( a_value );
+  if (file_out.is_open()) {
+    while (getline(file_out, line)) {
+      a_value = atof(line.c_str());
+      scale_outputs.push_back(a_value);
       n_outputs += 1;
       // chprintf("%f\n", a_value);
     }
     file_out.close();
-    n_outputs = scale_outputs.size();
+    n_outputs        = scale_outputs.size();
     next_output_indx = 0;
     chprintf("  Loaded %d scale outputs \n", n_outputs);
-  }
-  else{
+  } else {
     chprintf("  Error: Unable to open cosmology outputs file\n");
     exit(1);
   }
@@ -38,62 +35,56 @@ void Cosmology::Load_Scale_Outputs( struct parameters *P ) {
   chprintf(" Setting next snapshot output\n");
 
   int scale_indx = next_output_indx;
-  a_value = scale_outputs[scale_indx];
+  a_value        = scale_outputs[scale_indx];
 
-  while ( (current_a - a_value) > 1e-3  ){
+  while ((current_a - a_value) > 1e-3) {
     // chprintf( "%f   %f\n", a_value, current_a);
     scale_indx += 1;
     a_value = scale_outputs[scale_indx];
   }
   next_output_indx = scale_indx;
-  next_output = a_value;
-  chprintf("  Next output index: %d  \n", next_output_indx );
-  chprintf("  Next output z value: %f  \n", 1./next_output - 1 );
+  next_output      = a_value;
+  chprintf("  Next output index: %d  \n", next_output_indx);
+  chprintf("  Next output z value: %f  \n", 1. / next_output - 1);
 
   exit_now = false;
-
 }
 
-void Cosmology::Set_Scale_Outputs( struct parameters *P ){
-
-  if ( P->scale_outputs_file[0] == '\0' ){
-    chprintf( " Output every %d timesteps.\n", P->n_steps_output );
-    Real scale_end = 1 / ( P->End_redshift + 1);
-    scale_outputs.push_back( current_a );
-    scale_outputs.push_back( scale_end );
-    n_outputs = scale_outputs.size();
+void Cosmology::Set_Scale_Outputs(struct Parameters *P)
+{
+  if (P->scale_outputs_file[0] == '\0') {
+    chprintf(" Output every %d timesteps.\n", P->n_steps_output);
+    Real scale_end = 1 / (P->End_redshift + 1);
+    scale_outputs.push_back(current_a);
+    scale_outputs.push_back(scale_end);
+    n_outputs        = scale_outputs.size();
     next_output_indx = 0;
-    next_output = current_a;
-    chprintf("  Next output index: %d  \n", next_output_indx );
-    chprintf("  Next output z value: %f  \n", 1./next_output - 1 );
+    next_output      = current_a;
+    chprintf("  Next output index: %d  \n", next_output_indx);
+    chprintf("  Next output z value: %f  \n", 1. / next_output - 1);
+  } else {
+    Load_Scale_Outputs(P);
   }
-  else  Load_Scale_Outputs( P );
-
-
-
 }
 
-
-void Cosmology::Set_Next_Scale_Output(  ){
-
-
+void Cosmology::Set_Next_Scale_Output()
+{
   int scale_indx = next_output_indx;
-  Real a_value = scale_outputs[scale_indx];
-  // chprintf("Setting next output index. Current index: %d    n_outputs: %d ", scale_indx, n_outputs);
+  Real a_value   = scale_outputs[scale_indx];
+  // chprintf("Setting next output index. Current index: %d    n_outputs: %d ",
+  // scale_indx, n_outputs);
 
-  // if  ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx = 1;
+  // if  ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx
+  // = 1;
   scale_indx += 1;
 
-  if ( scale_indx < n_outputs ){
-    a_value = scale_outputs[scale_indx];
+  if (scale_indx < n_outputs) {
+    a_value          = scale_outputs[scale_indx];
     next_output_indx = scale_indx;
-    next_output = a_value;
-  }
-  else{
+    next_output      = a_value;
+  } else {
     exit_now = true;
   }
-
 }
 
-
 #endif
diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu
new file mode 100644
index 000000000..8b72facdf
--- /dev/null
+++ b/src/dust/dust_cuda.cu
@@ -0,0 +1,136 @@
+/*!
+ * \file dust_cuda.cu
+ * \author Helena Richie (helenarichie@gmail.com)
+ * \brief Contains code that updates the dust density scalar field. The dust_kernel function determines the rate of
+ * change of dust density, which is controlled by the sputtering timescale. The sputtering timescale is from the
+ * McKinnon et al. (2017) model of dust sputtering, which depends on the cell's gas density and temperature.
+ */
+
+#ifdef DUST
+
+  // STL includes
+  #include <stdio.h>
+
+  #include <cstdio>
+  #include <fstream>
+  #include <vector>
+
+  // Local includes
+  #include "../dust/dust_cuda.h"
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../grid/grid3D.h"
+  #include "../grid/grid_enum.h"
+  #include "../utils/cuda_utilities.h"
+  #include "../utils/gpu.hpp"
+  #include "../utils/hydro_utilities.h"
+
+void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma,
+                 Real grain_radius)
+{
+  int n_cells = nx * ny * nz;
+  int ngrid   = (n_cells + TPB - 1) / TPB;
+  dim3 dim1dGrid(ngrid, 1, 1);
+  dim3 dim1dBlock(TPB, 1, 1);
+  hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma,
+                     grain_radius);
+  GPU_Error_Check();
+}
+
+__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma,
+                            Real grain_radius)
+{
+  // get grid indices
+  int n_cells = nx * ny * nz;
+  int is, ie, js, je, ks, ke;
+  cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke);
+  // get a global thread ID
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int id      = threadIdx.x + blockId * blockDim.x;
+  int id_z    = id / (nx * ny);
+  int id_y    = (id - id_z * nx * ny) / nx;
+  int id_x    = id - id_z * nx * ny - id_y * nx;
+
+  // define physics variables
+  Real density_gas, density_dust;  // fluid mass densities
+  Real number_density;             // gas number density
+  Real mu = 0.6;                   // mean molecular weight
+
+  // define integration variables
+  Real dd_dt;          // instantaneous rate of change in dust density
+  Real dd;             // change in dust density at current timestep
+  Real dd_max = 0.01;  // allowable percentage of dust density increase
+  Real dt_sub;         // refined timestep
+
+  if (id_x >= is && id_x < ie && id_y >= js && id_y < je && id_z >= ks && id_z < ke) {
+    // get conserved quanitites
+    density_gas  = dev_conserved[id + n_cells * grid_enum::density];
+    density_dust = dev_conserved[id + n_cells * grid_enum::dust_density];
+
+    // convert mass density to number density
+    number_density = density_gas * DENSITY_UNIT / (mu * MP);
+
+    // Compute the temperature
+  #ifdef DE
+    Real const gas_energy  = dev_conserved[id + n_cells * grid_enum::GasEnergy];
+    Real const temperature = hydro_utilities::Calc_Temp_DE(gas_energy, gamma, number_density);
+  #else  // DE is not enabled
+    Real const energy     = dev_conserved[id + n_cells * grid_enum::Energy];
+    Real const momentum_x = dev_conserved[id + n_cells * grid_enum::momentum_x];
+    Real const momentum_y = dev_conserved[id + n_cells * grid_enum::momentum_y];
+    Real const momentum_z = dev_conserved[id + n_cells * grid_enum::momentum_z];
+
+    #ifdef MHD
+    auto const [magnetic_x, magnetic_y, magnetic_z] =
+        mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny);
+    Real const temperature =
+        hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y, momentum_z, gamma,
+                                             number_density, magnetic_x, magnetic_y, magnetic_z);
+    #else   // MHD is not defined
+    Real const temperature = hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y,
+                                                                  momentum_z, gamma, number_density);
+    #endif  // MHD
+  #endif    // DE
+
+    Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature, grain_radius) /
+                  TIME_UNIT;  // sputtering timescale, kyr (sim units)
+
+    dd_dt = Calc_dd_dt(density_dust, tau_sp);  // rate of change in dust density at current timestep
+    dd    = dd_dt * dt;                        // change in dust density at current timestep
+
+    // ensure that dust density is not changing too rapidly
+    while (dd / density_dust > dd_max) {
+      dt_sub = dd_max * density_dust / dd_dt;
+      density_dust += dt_sub * dd_dt;
+      dt -= dt_sub;
+      dd_dt = Calc_dd_dt(density_dust, tau_sp);
+      dd    = dt * dd_dt;
+    }
+
+    // update dust density
+    density_dust += dd;
+
+    dev_conserved[id + n_cells * grid_enum::dust_density] = density_dust;
+  }
+}
+
+// McKinnon et al. (2017) sputtering timescale
+__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature, Real grain_radius)
+{
+  Real a             = grain_radius;  // dust grain size in units of 0.1 micrometers
+  Real temperature_0 = 2e6;           // temp above which the sputtering rate is ~constant in K
+  Real omega         = 2.5;           // controls the low-temperature scaling of the sputtering rate
+  Real A             = 5.3618e15;     // 0.17 Gyr in s
+
+  number_density /= (6e-4);  // gas number density in units of 10^-27 g/cm^3
+
+  // sputtering timescale, s
+  Real tau_sp = A * (a / number_density) * (pow(temperature_0 / temperature, omega) + 1);
+
+  return tau_sp;
+}
+
+// McKinnon et al. (2017) sputtering model
+__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp) { return -density_dust / (tau_sp / 3); }
+
+#endif  // DUST
diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h
new file mode 100644
index 000000000..212901e8a
--- /dev/null
+++ b/src/dust/dust_cuda.h
@@ -0,0 +1,70 @@
+/*!
+ * \file dust_cuda.h
+ * \author Helena Richie (helenarichie@pitt.edu)
+ * \brief Contains the declaration for the kernel that updates the dust density scalar in dev_conserved.
+ *
+ */
+
+#ifdef DUST
+
+  #ifndef DUST_CUDA_H
+    #define DUST_CUDA_H
+
+    #include <math.h>
+
+    #include "../global/global.h"
+    #include "../utils/gpu.hpp"
+
+/*!
+ * \brief Launch the dust kernel.
+ *
+ * \param[in,out] dev_conserved The device conserved variable array.
+ * \param[in] nx Number of cells in the x-direction
+ * \param[in] ny Number of cells in the y-direction
+ * \param[in] nz Number of cells in the z-direction
+ * \param[in] n_ghost Number of ghost cells
+ * \param[in] n_fields Number of fields in dev_conserved
+ * \param[in] dt Simulation timestep
+ * \param[in] gamma Specific heat ratio
+ */
+void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma,
+                 Real grain_radius);
+
+/*!
+ * \brief Compute the change in dust density for a cell and update its value in dev_conserved.
+ *
+ * \param[in,out] dev_conserved The device conserved variable array. The dust field is updated in this function. If dual
+ * energy is turned on, then the dual energy field is updated, as well.
+ * \param[in] nx Number of cells in the x-direction
+ * \param[in] ny Number of cells in the y-direction
+ * \param[in] nz Number of cells in the z-direction
+ * \param[in] n_ghost Number of ghost cells
+ * \param[in] n_fields Number of fields in dev_conserved
+ * \param[in] dt Simulation timestep
+ * \param[in] gamma Specific heat ratio
+ */
+__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma,
+                            Real grain_radius);
+
+/*!
+ * \brief Compute the sputtering timescale based on a cell's density and temperature.
+ *
+ * \param[in] number_density Gas number density in cm^-3
+ * \param[in] temperature Gas temperature in K
+ *
+ * \return Real Sputtering timescale in seconds (McKinnon et al. 2017)
+ */
+__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature, Real grain_radius);
+
+/*!
+ * \brief Compute the rate of change in dust density based on the current dust density and sputtering timescale.
+ *
+ * \param[in] density_dust Dust mass density in M_sun/kpc^3
+ * \param[in] tau_sp Sputtering timescale in kyr
+ *
+ * \return Real Dust density rate of change (McKinnon et al. 2017)
+ */
+__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp);
+
+  #endif  // DUST_CUDA_H
+#endif    // DUST
\ No newline at end of file
diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp
new file mode 100644
index 000000000..5b59b2dc0
--- /dev/null
+++ b/src/dust/dust_cuda_tests.cpp
@@ -0,0 +1,72 @@
+/*!
+ * \file dust_cuda_tests.cpp
+ * \author Helena Richie (helenarichie@gmail.com)
+ * \brief Tests for dust model functions.
+ */
+
+// STL Includes
+#include <math.h>
+
+#include <iostream>
+#include <string>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../dust/dust_cuda.h"
+#include "../global/global_cuda.h"
+#include "../utils/gpu.hpp"
+#include "../utils/testing_utilities.h"
+
+#ifdef DUST
+
+TEST(tDUSTTestSputteringTimescale, CorrectInputExpectCorrectOutput)
+{
+  // Parameters
+  Real YR_IN_S                     = 3.154e7;
+  Real const k_test_number_density = 1;
+  Real const k_test_temperature    = pow(10, 5.0);
+  Real const k_test_grain_radius   = 1;
+  Real const k_fiducial_num        = 182565146.96398282;
+
+  Real test_num =
+      Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature, k_test_grain_radius) / YR_IN_S;  // yr
+
+  double abs_diff;
+  int64_t ulps_diff;
+
+  bool is_true;
+
+  is_true = testing_utilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff);
+
+  EXPECT_TRUE(is_true) << "The fiducial value is:       " << k_fiducial_num << std::endl
+                       << "The test value is:           " << test_num << std::endl
+                       << "The absolute difference is:  " << abs_diff << std::endl
+                       << "The ULP difference is:       " << ulps_diff << std::endl;
+}
+
+TEST(tDUSTTestSputteringGrowthRate, CorrectInputExpectCorrectOutput)
+{
+  // Parameters
+  Real YR_IN_S                   = 3.154e7;
+  Real const k_test_tau_sp       = 0.17e6;                // kyr
+  Real const k_test_density_dust = 1e-26 / DENSITY_UNIT;  // sim units
+  Real const k_fiducial_num      = -2.6073835738056728;
+
+  Real test_num = Calc_dd_dt(k_test_density_dust, k_test_tau_sp);
+
+  double abs_diff;
+  int64_t ulps_diff;
+
+  bool is_true;
+
+  is_true = testing_utilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff);
+
+  EXPECT_TRUE(is_true) << "The fiducial value is:       " << k_fiducial_num << std::endl
+                       << "The test value is:           " << test_num << std::endl
+                       << "The absolute difference is:  " << abs_diff << std::endl
+                       << "The ULP difference is:       " << ulps_diff << std::endl;
+}
+
+#endif  // DUST
\ No newline at end of file
diff --git a/src/global/global.cpp b/src/global/global.cpp
index 1f6a5cbfa..64eac0d5b 100644
--- a/src/global/global.cpp
+++ b/src/global/global.cpp
@@ -1,415 +1,493 @@
 /*  \file global.cpp
  *  \brief Global function definitions.*/
 
+#include "../global/global.h"
 
+#include <ctype.h>
 #include <math.h>
-#include <sys/time.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/time.h>
+
 #include <set>
-#include <ctype.h>
-#include "../global/global.h"
-#include "../io/io.h" //defines chprintf
+
+#include "../io/io.h"                 //defines chprintf
+#include "../utils/error_handling.h"  // defines ASSERT
 
 /* Global variables */
-Real gama; // Ratio of specific heats
-Real C_cfl; // CFL number
+Real gama;   // Ratio of specific heats
+Real C_cfl;  // CFL number
 
 #ifdef PARTICLES
-#ifdef MPI_CHOLLA
+  #ifdef MPI_CHOLLA
 // Constants for the inital size of the buffers for particles transfer
 // and the number of data transferred for each particle
 int N_PARTICLES_TRANSFER;
 int N_DATA_PER_PARTICLE_TRANSFER;
+  #endif
 #endif
-#endif
-
 
 /*! \fn void Set_Gammas(Real gamma_in)
  *  \brief Set gamma values for Riemann solver */
 void Set_Gammas(Real gamma_in)
 {
-    //set gamma
-    gama = gamma_in;
-
+  // set gamma
+  gama = gamma_in;
+  CHOLLA_ASSERT(gama > 1.0, "Gamma must be greater than one.");
 }
 
-
-/*! \fn double get_time(void)
+/*! \fn double Get_Time(void)
  *  \brief Returns the current clock time. */
-double get_time(void)
+double Get_Time(void)
 {
   struct timeval timer;
-  gettimeofday(&timer,NULL);
-  return timer.tv_sec + 1.0e-6*timer.tv_usec;
+  gettimeofday(&timer, NULL);
+  return timer.tv_sec + 1.0e-6 * timer.tv_usec;
 }
 
-/*! \fn int sgn
+/*! \fn int Sgn
  *  \brief Mathematical sign function. Returns sign of x. */
-int sgn(Real x)
+int Sgn(Real x)
 {
-    if (x < 0) return -1;
-    else return 1;
+  if (x < 0) {
+    return -1;
+  } else {
+    return 1;
+  }
 }
 
-#ifndef CUDA
-/*! \fn Real calc_eta(Real cW[], Real gamma)
- *  \brief Calculate the eta value for the H correction. */
-Real calc_eta(Real cW[], Real gamma)
-{
-  Real pl, pr, al, ar;
-
-  pl = (cW[8] - 0.5*(cW[2]*cW[2] + cW[4]*cW[4] + cW[6]*cW[6])/cW[0]) * (gamma-1.0);
-  pl = fmax(pl, TINY_NUMBER);
-  pr = (cW[9] - 0.5*(cW[3]*cW[3] + cW[5]*cW[5] + cW[7]*cW[7])/cW[1]) * (gamma-1.0);
-  pr = fmax(pr, TINY_NUMBER);
-
-  al = sqrt(gamma*pl/cW[0]);
-  ar = sqrt(gamma*pr/cW[1]);
+// global mpi-related variables (they are declared here because they are initialized even when
+// the MPI_CHOLLA variable is not defined)
 
-  return 0.5*fabs((cW[3]/cW[1] + ar) - (cW[2]/cW[0]-al));
+int procID; /*process rank*/
+int nproc;  /*number of processes in global comm*/
+int root;   /*rank of root process*/
 
+/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables
+ * that still meaningful in non-mpi simulations.
+ */
+void Init_Global_Parallel_Vars_No_MPI()
+{
+#ifdef MPI_CHOLLA
+  CHOLLA_ERROR("This function should not be executed when compiled with MPI");
+#endif
+  procID = 0;
+  nproc  = 1;
+  root   = 0;
 }
-#endif //NO CUDA
-
 
-/*! \fn char trim(char *s)
+/*! \fn char Trim(char *s)
  *  \brief Gets rid of trailing and leading whitespace. */
-char *trim (char * s)
+char *Trim(char *s)
 {
   /* Initialize start, end pointers */
-  char *s1 = s, *s2 = &s[strlen (s) - 1];
+  char *s1 = s, *s2 = &s[strlen(s) - 1];
 
   /* Trim and delimit right side */
-  while ( (isspace (*s2)) && (s2 >= s1) )
+  while ((isspace(*s2)) && (s2 >= s1)) {
     s2--;
-  *(s2+1) = '\0';
+  }
+  *(s2 + 1) = '\0';
 
   /* Trim left side */
-  while ( (isspace (*s1)) && (s1 < s2) )
+  while ((isspace(*s1)) && (s1 < s2)) {
     s1++;
+  }
 
   /* Copy finished string */
-  strcpy (s, s1);
+  strcpy(s, s1);
   return s;
 }
 
-const std::set<const char*> optionalParams = {"flag_delta", "ddelta_dt", "n_delta",
-       "Lz" , "Lx" , "phi" , "theta", "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L",
-       "Init_redshift", "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z" };
+// NOLINTNEXTLINE(cert-err58-cpp)
+const std::set<const char *> optionalParams = {
+    "flag_delta",   "ddelta_dt",   "n_delta",  "Lz",       "Lx",      "phi",     "theta",
+    "delta",        "nzr",         "nxr",      "H0",       "Omega_M", "Omega_L", "Init_redshift",
+    "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z"};
 
-/*! \fn int is_param_valid(char *name);
- * \brief Verifies that a param is valid (even if not needed).  Avoids "warnings" in output. */
-int is_param_valid(const char* param_name) {
-  for (auto it=optionalParams.begin(); it != optionalParams.end(); ++it) {
-      if (strcmp(param_name, *it) == 0) return 1;
+/*! \fn int Is_Param_Valid(char *name);
+ * \brief Verifies that a param is valid (even if not needed).  Avoids
+ * "warnings" in output. */
+int Is_Param_Valid(const char *param_name)
+{
+  // for (auto optionalParam = optionalParams.begin(); optionalParam != optionalParams.end(); ++optionalParam) {
+  for (const auto *optionalParam : optionalParams) {
+    if (strcmp(param_name, optionalParam) == 0) {
+      return 1;
+    }
   }
   return 0;
 }
 
-void parse_param(char *name,char *value, struct parameters *parms);
+void Parse_Param(char *name, char *value, struct Parameters *parms);
 
-
-/*! \fn void parse_params(char *param_file, struct parameters * parms);
+/*! \fn void Parse_Params(char *param_file, struct Parameters * parms);
  *  \brief Reads the parameters in the given file into a structure. */
-void parse_params (char *param_file, struct parameters * parms, int argc, char** argv)
+void Parse_Params(char *param_file, struct Parameters *parms, int argc, char **argv)
 {
   int buf;
   char *s, buff[256];
-  FILE *fp = fopen (param_file, "r");
-  if (fp == NULL)
-  {
+  FILE *fp = fopen(param_file, "r");
+  if (fp == NULL) {
     chprintf("Exiting at file %s line %d: failed to read param file %s \n", __FILE__, __LINE__, param_file);
     exit(1);
     return;
   }
-  // set default hydro file output parameter
-  parms->n_hydro=1;
-  parms->n_particle=1;
-  parms->n_slice=1;
-  parms->n_projection=1;
-  parms->n_rotated_projection=1;
-
-#ifdef ROTATED_PROJECTION
-  //initialize rotation parameters to zero
-  parms->delta = 0;
-  parms->theta = 0;
-  parms->phi   = 0;
-  parms->n_delta = 0;
-  parms->ddelta_dt = 0;
-  parms->flag_delta = 0;
-#endif /*ROTATED_PROJECTION*/
 
 #ifdef COSMOLOGY
-//Initialize file name as an empty string
-parms->scale_outputs_file[0] = '\0';
+  // Initialize file name as an empty string
+  parms->scale_outputs_file[0] = '\0';
 #endif
 
-
   /* Read next line */
-  while ((s = fgets (buff, sizeof buff, fp)) != NULL)
-  {
+  while ((s = fgets(buff, sizeof buff, fp)) != NULL) {
     /* Skip blank lines and comments */
-    if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';')
+    if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') {
       continue;
+    }
 
     /* Parse name/value pair from line */
     char name[MAXLEN], value[MAXLEN];
-    s = strtok (buff, "=");
-    if (s==NULL)
+    s = strtok(buff, "=");
+    if (s == NULL) {
       continue;
-    else
-      strncpy (name, s, MAXLEN);
-    s = strtok (NULL, "=");
-    if (s==NULL)
+    } else {
+      strncpy(name, s, MAXLEN);
+    }
+    s = strtok(NULL, "=");
+    if (s == NULL) {
       continue;
-    else
-      strncpy (value, s, MAXLEN);
-    trim (value);
-    parse_param(name,value,parms);
+    } else {
+      strncpy(value, s, MAXLEN);
+    }
+    Trim(value);
+    Parse_Param(name, value, parms);
   }
   /* Close file */
-  fclose (fp);
+  fclose(fp);
 
   // Parse overriding args from command line
   for (int i = 0; i < argc; ++i) {
     char name[MAXLEN], value[MAXLEN];
-    s = strtok (argv[i], "=");
-    if (s==NULL)
+    s = strtok(argv[i], "=");
+    if (s == NULL) {
       continue;
-    else
-      strncpy (name, s, MAXLEN);
-    s = strtok (NULL, "=");
-    if (s==NULL)
+    } else {
+      strncpy(name, s, MAXLEN);
+    }
+    s = strtok(NULL, "=");
+    if (s == NULL) {
       continue;
-    else
-      strncpy (value, s, MAXLEN);
-    parse_param(name,value,parms);
-    chprintf("Override with %s=%s\n",name,value);
-
+    } else {
+      strncpy(value, s, MAXLEN);
+    }
+    Parse_Param(name, value, parms);
+    chprintf("Override with %s=%s\n", name, value);
   }
 }
 
-/*! \fn void parse_param(char *name,char *value, struct parameters *parms);
+/*! \fn void Parse_Param(char *name,char *value, struct Parameters *parms);
  *  \brief Parses and sets a single param based on name and value. */
-void parse_param(char *name,char *value, struct parameters *parms){
+void Parse_Param(char *name, char *value, struct Parameters *parms)
+{
   /* Copy into correct entry in parameters struct */
-  if (strcmp(name, "nx")==0)
+  if (strcmp(name, "nx") == 0) {
     parms->nx = atoi(value);
-  else if (strcmp(name, "ny")==0)
+  } else if (strcmp(name, "ny") == 0) {
     parms->ny = atoi(value);
-  else if (strcmp(name, "nz")==0)
+  } else if (strcmp(name, "nz") == 0) {
     parms->nz = atoi(value);
-  else if (strcmp(name, "tout")==0)
+#ifdef STATIC_GRAV
+  } else if (strcmp(name, "custom_grav") == 0) {
+    parms->custom_grav = atoi(value);
+#endif
+  } else if (strcmp(name, "tout") == 0) {
     parms->tout = atof(value);
-  else if (strcmp(name, "outstep")==0)
+  } else if (strcmp(name, "outstep") == 0) {
     parms->outstep = atof(value);
-  else if (strcmp(name, "n_steps_output")==0)
+  } else if (strcmp(name, "n_steps_output") == 0) {
     parms->n_steps_output = atoi(value);
-  else if (strcmp(name, "gamma")==0)
+  } else if (strcmp(name, "gamma") == 0) {
     parms->gamma = atof(value);
-  else if (strcmp(name, "init")==0)
-    strncpy (parms->init, value, MAXLEN);
-  else if (strcmp(name, "nfile")==0)
+  } else if (strcmp(name, "init") == 0) {
+    strncpy(parms->init, value, MAXLEN);
+  } else if (strcmp(name, "nfile") == 0) {
     parms->nfile = atoi(value);
-  else if (strcmp(name, "n_hydro")==0)
+  } else if (strcmp(name, "n_hydro") == 0) {
     parms->n_hydro = atoi(value);
-  else if (strcmp(name, "n_particle")==0)
+  } else if (strcmp(name, "n_particle") == 0) {
     parms->n_particle = atoi(value);
-  else if (strcmp(name, "n_projection")==0)
+  } else if (strcmp(name, "n_projection") == 0) {
     parms->n_projection = atoi(value);
-  else if (strcmp(name, "n_rotated_projection")==0)
+  } else if (strcmp(name, "n_rotated_projection") == 0) {
     parms->n_rotated_projection = atoi(value);
-  else if (strcmp(name, "n_slice")==0)
+  } else if (strcmp(name, "n_slice") == 0) {
     parms->n_slice = atoi(value);
-  else if (strcmp(name, "n_out_float32")==0)
+  } else if (strcmp(name, "n_out_float32") == 0) {
     parms->n_out_float32 = atoi(value);
-  else if (strcmp(name, "out_float32_density")==0)
+  } else if (strcmp(name, "out_float32_density") == 0) {
     parms->out_float32_density = atoi(value);
-  else if (strcmp(name, "out_float32_momentum_x")==0)
+  } else if (strcmp(name, "out_float32_momentum_x") == 0) {
     parms->out_float32_momentum_x = atoi(value);
-  else if (strcmp(name, "out_float32_momentum_y")==0)
+  } else if (strcmp(name, "out_float32_momentum_y") == 0) {
     parms->out_float32_momentum_y = atoi(value);
-  else if (strcmp(name, "out_float32_momentum_z")==0)
+  } else if (strcmp(name, "out_float32_momentum_z") == 0) {
     parms->out_float32_momentum_z = atoi(value);
-  else if (strcmp(name, "out_float32_Energy")==0)
+  } else if (strcmp(name, "out_float32_Energy") == 0) {
     parms->out_float32_Energy = atoi(value);
 #ifdef DE
-  else if (strcmp(name, "out_float32_GasEnergy")==0)
+  } else if (strcmp(name, "out_float32_GasEnergy") == 0) {
     parms->out_float32_GasEnergy = atoi(value);
-#endif // DE
+#endif  // DE
 #ifdef MHD
-  else if (strcmp(name, "out_float32_magnetic_x")==0)
+  } else if (strcmp(name, "out_float32_magnetic_x") == 0) {
     parms->out_float32_magnetic_x = atoi(value);
-  else if (strcmp(name, "out_float32_magnetic_y")==0)
+  } else if (strcmp(name, "out_float32_magnetic_y") == 0) {
     parms->out_float32_magnetic_y = atoi(value);
-  else if (strcmp(name, "out_float32_magnetic_z")==0)
+  } else if (strcmp(name, "out_float32_magnetic_z") == 0) {
     parms->out_float32_magnetic_z = atoi(value);
-#endif // MHD
-  else if (strcmp(name, "xmin")==0)
+#endif  // MHD
+  } else if (strcmp(name, "output_always") == 0) {
+    int tmp = atoi(value);
+    // In this case the CHOLLA_ASSERT macro runs into issuse with the readability-simplify-boolean-expr clang-tidy check
+    // due to some weird macro expansion stuff. That check has been disabled here for now but in clang-tidy 18 the
+    // IgnoreMacro option should be used instead.
+    // NOLINTNEXTLINE(readability-simplify-boolean-expr)
+    CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "output_always must be 1 or 0.");
+    parms->output_always = tmp;
+  } else if (strcmp(name, "legacy_flat_outdir") == 0) {
+    int tmp = atoi(value);
+    CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "legacy_flat_outdir must be 1 or 0.");
+    parms->legacy_flat_outdir = tmp;
+  } else if (strcmp(name, "xmin") == 0) {
     parms->xmin = atof(value);
-  else if (strcmp(name, "ymin")==0)
+  } else if (strcmp(name, "ymin") == 0) {
     parms->ymin = atof(value);
-  else if (strcmp(name, "zmin")==0)
+  } else if (strcmp(name, "zmin") == 0) {
     parms->zmin = atof(value);
-  else if (strcmp(name, "xlen")==0)
+  } else if (strcmp(name, "xlen") == 0) {
     parms->xlen = atof(value);
-  else if (strcmp(name, "ylen")==0)
+  } else if (strcmp(name, "ylen") == 0) {
     parms->ylen = atof(value);
-  else if (strcmp(name, "zlen")==0)
+  } else if (strcmp(name, "zlen") == 0) {
     parms->zlen = atof(value);
-  else if (strcmp(name, "xl_bcnd")==0)
+  } else if (strcmp(name, "xl_bcnd") == 0) {
     parms->xl_bcnd = atoi(value);
-  else if (strcmp(name, "xu_bcnd")==0)
+  } else if (strcmp(name, "xu_bcnd") == 0) {
     parms->xu_bcnd = atoi(value);
-  else if (strcmp(name, "yl_bcnd")==0)
+  } else if (strcmp(name, "yl_bcnd") == 0) {
     parms->yl_bcnd = atoi(value);
-  else if (strcmp(name, "yu_bcnd")==0)
+  } else if (strcmp(name, "yu_bcnd") == 0) {
     parms->yu_bcnd = atoi(value);
-  else if (strcmp(name, "zl_bcnd")==0)
+  } else if (strcmp(name, "zl_bcnd") == 0) {
     parms->zl_bcnd = atoi(value);
-  else if (strcmp(name, "zu_bcnd")==0)
+  } else if (strcmp(name, "zu_bcnd") == 0) {
     parms->zu_bcnd = atoi(value);
-  else if (strcmp(name, "custom_bcnd")==0)
-    strncpy (parms->custom_bcnd, value, MAXLEN);
-  else if (strcmp(name, "outdir")==0)
-    strncpy (parms->outdir, value, MAXLEN);
-  else if (strcmp(name, "indir")==0)
-    strncpy (parms->indir, value, MAXLEN);
-  else if (strcmp(name, "rho")==0)
+  } else if (strcmp(name, "custom_bcnd") == 0) {
+    strncpy(parms->custom_bcnd, value, MAXLEN);
+  } else if (strcmp(name, "outdir") == 0) {
+    strncpy(parms->outdir, value, MAXLEN);
+  } else if (strcmp(name, "indir") == 0) {
+    strncpy(parms->indir, value, MAXLEN);
+  } else if (strcmp(name, "rho") == 0) {
     parms->rho = atof(value);
-  else if (strcmp(name, "vx")==0)
+  } else if (strcmp(name, "vx") == 0) {
     parms->vx = atof(value);
-  else if (strcmp(name, "vy")==0)
+  } else if (strcmp(name, "vy") == 0) {
     parms->vy = atof(value);
-  else if (strcmp(name, "vz")==0)
+  } else if (strcmp(name, "vz") == 0) {
     parms->vz = atof(value);
-  else if (strcmp(name, "P")==0)
+  } else if (strcmp(name, "P") == 0) {
     parms->P = atof(value);
-  else if (strcmp(name, "Bx")==0)
+  } else if (strcmp(name, "Bx") == 0) {
     parms->Bx = atof(value);
-  else if (strcmp(name, "By")==0)
+  } else if (strcmp(name, "By") == 0) {
     parms->By = atof(value);
-  else if (strcmp(name, "Bz")==0)
+  } else if (strcmp(name, "Bz") == 0) {
     parms->Bz = atof(value);
-  else if (strcmp(name, "A")==0)
+  } else if (strcmp(name, "A") == 0) {
     parms->A = atof(value);
-  else if (strcmp(name, "rho_l")==0)
+  } else if (strcmp(name, "rho_l") == 0) {
     parms->rho_l = atof(value);
-  else if (strcmp(name, "vx_l")==0)
+  } else if (strcmp(name, "vx_l") == 0) {
     parms->vx_l = atof(value);
-  else if (strcmp(name, "vy_l")==0)
+  } else if (strcmp(name, "vy_l") == 0) {
     parms->vy_l = atof(value);
-  else if (strcmp(name, "vz_l")==0)
+  } else if (strcmp(name, "vz_l") == 0) {
     parms->vz_l = atof(value);
-  else if (strcmp(name, "P_l")==0)
+  } else if (strcmp(name, "P_l") == 0) {
     parms->P_l = atof(value);
-  else if (strcmp(name, "Bx_l")==0)
+  } else if (strcmp(name, "Bx_l") == 0) {
     parms->Bx_l = atof(value);
-  else if (strcmp(name, "By_l")==0)
+  } else if (strcmp(name, "By_l") == 0) {
     parms->By_l = atof(value);
-  else if (strcmp(name, "Bz_l")==0)
+  } else if (strcmp(name, "Bz_l") == 0) {
     parms->Bz_l = atof(value);
-  else if (strcmp(name, "rho_r")==0)
+  } else if (strcmp(name, "rho_r") == 0) {
     parms->rho_r = atof(value);
-  else if (strcmp(name, "vx_r")==0)
+  } else if (strcmp(name, "vx_r") == 0) {
     parms->vx_r = atof(value);
-  else if (strcmp(name, "vy_r")==0)
+  } else if (strcmp(name, "vy_r") == 0) {
     parms->vy_r = atof(value);
-  else if (strcmp(name, "vz_r")==0)
+  } else if (strcmp(name, "vz_r") == 0) {
     parms->vz_r = atof(value);
-  else if (strcmp(name, "P_r")==0)
+  } else if (strcmp(name, "P_r") == 0) {
     parms->P_r = atof(value);
-  else if (strcmp(name, "Bx_r")==0)
+  } else if (strcmp(name, "Bx_r") == 0) {
     parms->Bx_r = atof(value);
-  else if (strcmp(name, "By_r")==0)
+  } else if (strcmp(name, "By_r") == 0) {
     parms->By_r = atof(value);
-  else if (strcmp(name, "Bz_r")==0)
+  } else if (strcmp(name, "Bz_r") == 0) {
     parms->Bz_r = atof(value);
-  else if (strcmp(name, "diaph")==0)
+  } else if (strcmp(name, "diaph") == 0) {
     parms->diaph = atof(value);
+  } else if (strcmp(name, "rEigenVec_rho") == 0) {
+    parms->rEigenVec_rho = atof(value);
+  } else if (strcmp(name, "rEigenVec_MomentumX") == 0) {
+    parms->rEigenVec_MomentumX = atof(value);
+  } else if (strcmp(name, "rEigenVec_MomentumY") == 0) {
+    parms->rEigenVec_MomentumY = atof(value);
+  } else if (strcmp(name, "rEigenVec_MomentumZ") == 0) {
+    parms->rEigenVec_MomentumZ = atof(value);
+  } else if (strcmp(name, "rEigenVec_E") == 0) {
+    parms->rEigenVec_E = atof(value);
+  } else if (strcmp(name, "rEigenVec_Bx") == 0) {
+    parms->rEigenVec_Bx = atof(value);
+  } else if (strcmp(name, "rEigenVec_By") == 0) {
+    parms->rEigenVec_By = atof(value);
+  } else if (strcmp(name, "rEigenVec_Bz") == 0) {
+    parms->rEigenVec_Bz = atof(value);
+  } else if (strcmp(name, "pitch") == 0) {
+    parms->pitch = atof(value);
+  } else if (strcmp(name, "yaw") == 0) {
+    parms->yaw = atof(value);
+  } else if (strcmp(name, "polarization") == 0) {
+    parms->polarization = atof(value);
+  } else if (strcmp(name, "radius") == 0) {
+    parms->radius = atof(value);
+  } else if (strcmp(name, "P_blast") == 0) {
+    parms->P_blast = atof(value);
+  } else if (strcmp(name, "wave_length") == 0) {
+    parms->wave_length = atof(value);
 #ifdef PARTICLES
-  else if (strcmp(name, "prng_seed")==0)
+  } else if (strcmp(name, "prng_seed") == 0) {
     parms->prng_seed = atoi(value);
-#endif // PARTICLES
+#endif  // PARTICLES
+#ifdef SUPERNOVA
+  } else if (strcmp(name, "snr_filename") == 0) {
+    strncpy(parms->snr_filename, value, MAXLEN);
+#endif
 #ifdef ROTATED_PROJECTION
-  else if (strcmp(name, "nxr")==0)
+  } else if (strcmp(name, "nxr") == 0) {
     parms->nxr = atoi(value);
-  else if (strcmp(name, "nzr")==0)
+  } else if (strcmp(name, "nzr") == 0) {
     parms->nzr = atoi(value);
-  else if (strcmp(name, "delta")==0)
+  } else if (strcmp(name, "delta") == 0) {
     parms->delta = atof(value);
-  else if (strcmp(name, "theta")==0)
+  } else if (strcmp(name, "theta") == 0) {
     parms->theta = atof(value);
-  else if (strcmp(name, "phi")==0)
+  } else if (strcmp(name, "phi") == 0) {
     parms->phi = atof(value);
-  else if (strcmp(name, "Lx")==0)
-    parms->Lx  = atof(value);
-  else if (strcmp(name, "Lz")==0)
+  } else if (strcmp(name, "Lx") == 0) {
+    parms->Lx = atof(value);
+  } else if (strcmp(name, "Lz") == 0) {
     parms->Lz = atof(value);
-  else if (strcmp(name, "n_delta")==0)
+  } else if (strcmp(name, "n_delta") == 0) {
     parms->n_delta = atoi(value);
-  else if (strcmp(name, "ddelta_dt")==0)
+  } else if (strcmp(name, "ddelta_dt") == 0) {
     parms->ddelta_dt = atof(value);
-  else if (strcmp(name, "flag_delta")==0)
-    parms->flag_delta  = atoi(value);
+  } else if (strcmp(name, "flag_delta") == 0) {
+    parms->flag_delta = atoi(value);
 #endif /*ROTATED_PROJECTION*/
 #ifdef COSMOLOGY
-  else if (strcmp(name, "scale_outputs_file")==0)
-    strncpy (parms->scale_outputs_file, value, MAXLEN);
-  else if (strcmp(name, "Init_redshift")==0)
-    parms->Init_redshift  = atof(value);
-  else if (strcmp(name, "End_redshift")==0)
-    parms->End_redshift  = atof(value);
-  else if (strcmp(name, "H0")==0)
-    parms->H0  = atof(value);
-  else if (strcmp(name, "Omega_M")==0)
-    parms->Omega_M  = atof(value);
-  else if (strcmp(name, "Omega_L")==0)
-    parms->Omega_L  = atof(value);
-  else if (strcmp(name, "Omega_b")==0)
-    parms->Omega_b  = atof(value);
-#endif //COSMOLOGY
+  } else if (strcmp(name, "scale_outputs_file") == 0) {
+    strncpy(parms->scale_outputs_file, value, MAXLEN);
+  } else if (strcmp(name, "Init_redshift") == 0) {
+    parms->Init_redshift = atof(value);
+  } else if (strcmp(name, "End_redshift") == 0) {
+    parms->End_redshift = atof(value);
+  } else if (strcmp(name, "H0") == 0) {
+    parms->H0 = atof(value);
+  } else if (strcmp(name, "Omega_M") == 0) {
+    parms->Omega_M = atof(value);
+  } else if (strcmp(name, "Omega_L") == 0) {
+    parms->Omega_L = atof(value);
+  } else if (strcmp(name, "Omega_b") == 0) {
+    parms->Omega_b = atof(value);
+#endif  // COSMOLOGY
 #ifdef TILED_INITIAL_CONDITIONS
-  else if (strcmp(name, "tile_length")==0)
-    parms->tile_length  = atof(value);
-#endif //TILED_INITIAL_CONDITIONS
+  } else if (strcmp(name, "tile_length") == 0) {
+    parms->tile_length = atof(value);
+#endif  // TILED_INITIAL_CONDITIONS
 
 #ifdef SET_MPI_GRID
-  // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z]
-  else if (strcmp(name, "n_proc_x")==0)
-    parms->n_proc_x  = atoi(value);
-  else if (strcmp(name, "n_proc_y")==0)
-    parms->n_proc_y  = atoi(value);
-  else if (strcmp(name, "n_proc_z")==0)
-    parms->n_proc_z  = atoi(value);
+    // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z]
+  } else if (strcmp(name, "n_proc_x") == 0) {
+    parms->n_proc_x = atoi(value);
+  } else if (strcmp(name, "n_proc_y") == 0) {
+    parms->n_proc_y = atoi(value);
+  } else if (strcmp(name, "n_proc_z") == 0) {
+    parms->n_proc_z = atoi(value);
 #endif
-  else if (strcmp(name, "bc_potential_type")==0)
-    parms->bc_potential_type  = atoi(value);
+  } else if (strcmp(name, "bc_potential_type") == 0) {
+    parms->bc_potential_type = atoi(value);
 #ifdef CHEMISTRY_GPU
-    else if (strcmp(name, "UVB_rates_file")==0)
-      strncpy (parms->UVB_rates_file, value, MAXLEN);
+  } else if (strcmp(name, "UVB_rates_file") == 0) {
+    strncpy(parms->UVB_rates_file, value, MAXLEN);
 #endif
 #ifdef COOLING_GRACKLE
-  else if (strcmp(name, "UVB_rates_file")==0)
-    strncpy (parms->UVB_rates_file, value, MAXLEN);
+  } else if (strcmp(name, "UVB_rates_file") == 0) {
+    strncpy(parms->UVB_rates_file, value, MAXLEN);
+#endif
+#ifdef TEMPERATURE_FLOOR
+  } else if (strcmp(name, "temperature_floor") == 0) {
+    parms->temperature_floor = atof(value);
+    if (parms->temperature_floor == 0) {
+      chprintf(
+          "WARNING: temperature floor is set to its default value (zero)! It can be set to a different value in the "
+          "input parameter file.\n");
+    }
+#endif
+#ifdef DENSITY_FLOOR
+  } else if (strcmp(name, "density_floor") == 0) {
+    parms->density_floor = atof(value);
+    if (parms->density_floor == 0) {
+      chprintf(
+          "WARNING: density floor is set to its default value (zero)! It can be set to a different value in the input "
+          "parameter file.\n");
+    }
+#endif
+#ifdef SCALAR_FLOOR
+  } else if (strcmp(name, "scalar_floor") == 0) {
+    parms->scalar_floor = atof(value);
+    if (parms->scalar_floor == 0) {
+      chprintf(
+          "WARNING: scalar floor is set to its default value (zero)! It can be set to a different value in the input "
+          "parameter file.\n");
+    }
 #endif
 #ifdef ANALYSIS
-  else if (strcmp(name, "analysis_scale_outputs_file")==0)
-    strncpy (parms->analysis_scale_outputs_file, value, MAXLEN);
-  else if (strcmp(name, "analysisdir")==0)
-    strncpy (parms->analysisdir, value, MAXLEN);
-  else if (strcmp(name, "lya_skewers_stride")==0)
-    parms->lya_skewers_stride  = atoi(value);
-  else if (strcmp(name, "lya_Pk_d_log_k")==0)
-    parms->lya_Pk_d_log_k  = atof(value);
+  } else if (strcmp(name, "analysis_scale_outputs_file") == 0) {
+    strncpy(parms->analysis_scale_outputs_file, value, MAXLEN);
+  } else if (strcmp(name, "analysisdir") == 0) {
+    strncpy(parms->analysisdir, value, MAXLEN);
+  } else if (strcmp(name, "lya_skewers_stride") == 0) {
+    parms->lya_skewers_stride = atoi(value);
+  } else if (strcmp(name, "lya_Pk_d_log_k") == 0) {
+    parms->lya_Pk_d_log_k = atof(value);
   #ifdef OUTPUT_SKEWERS
-  else if (strcmp(name, "skewersdir")==0)
-    strncpy (parms->skewersdir, value, MAXLEN);
+  } else if (strcmp(name, "skewersdir") == 0) {
+    strncpy(parms->skewersdir, value, MAXLEN);
   #endif
 #endif
-  else if (!is_param_valid(name))
-    chprintf ("WARNING: %s/%s: Unknown parameter/value pair!\n",
-	    name, value);
+#ifdef SCALAR
+  #ifdef DUST
+  } else if (strcmp(name, "grain_radius") == 0) {
+    parms->grain_radius = atoi(value);
+  #endif
+#endif
+  } else if (!Is_Param_Valid(name)) {
+    chprintf("WARNING: %s/%s: Unknown parameter/value pair!\n", name, value);
+  }
 }
diff --git a/src/global/global.h b/src/global/global.h
index 4e6d8eeb9..d2734131e 100644
--- a/src/global/global.h
+++ b/src/global/global.h
@@ -1,169 +1,154 @@
 /*! /file global.h
  *  /brief Declarations of global variables and functions. */
 
-
 #ifndef GLOBAL_H
 #define GLOBAL_H
 
-#ifdef COOLING_CPU
-#include <gsl/gsl_spline.h>
-#include <gsl/gsl_spline2d.h>
-#endif
+#include "../grid/grid_enum.h"  // defines NSCALARS
 
-#ifdef  PARTICLES
+#ifdef PARTICLES
   #include <cstdint>
-#endif  //PARTICLES
+#endif  // PARTICLES
 
-#if PRECISION==1
-#ifndef TYPEDEF_DEFINED_REAL
+#if PRECISION == 1
+  #ifndef TYPEDEF_DEFINED_REAL
 typedef float Real;
+  #endif
 #endif
-#endif
-#if PRECISION==2
-#ifndef TYPEDEF_DEFINED_REAL
+#if PRECISION == 2
+  #ifndef TYPEDEF_DEFINED_REAL
 typedef double Real;
-#endif
+  #endif
 #endif
 
-#define MAXLEN 2048
+#define MAXLEN      2048
 #define TINY_NUMBER 1.0e-20
-#define PI 3.141592653589793
-#define MP 1.672622e-24 // mass of proton, grams
-#define KB 1.380658e-16 // boltzmann constant, cgs
-//#define GN 6.67259e-8 // gravitational constant, cgs
-#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2
-
-#define MYR 31.536e12 //Myears in secs
-#define KPC 3.086e16 // kpc in km
-#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1
-#define MSUN_CGS 1.98847e33; //Msun in gr
-#define KPC_CGS 3.086e21;  //kpc in cm
-#define KM_CGS 1e5; //km in cm
-#define MH 1.67262171e-24 //Mass of hydrogen [g]   
-
-#define TIME_UNIT 3.15569e10 // 1 kyr in s
-#define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm
-#define MASS_UNIT 1.98847e33 // 1 solar mass in grams
-#define DENSITY_UNIT (MASS_UNIT/(LENGTH_UNIT*LENGTH_UNIT*LENGTH_UNIT))
-#define VELOCITY_UNIT (LENGTH_UNIT/TIME_UNIT)
-#define ENERGY_UNIT (DENSITY_UNIT*VELOCITY_UNIT*VELOCITY_UNIT)
-#define PRESSURE_UNIT (DENSITY_UNIT*VELOCITY_UNIT*VELOCITY_UNIT)
-#define SP_ENERGY_UNIT (VELOCITY_UNIT*VELOCITY_UNIT)
-#define MAGNETIC_FIELD_UNIT (sqrt(MASS_UNIT/LENGTH_UNIT) / TIME_UNIT)
+#define MP          1.672622e-24  // mass of proton, grams
+#define KB          1.380658e-16  // boltzmann constant, cgs
+// #define GN 6.67259e-8 // gravitational constant, cgs
+#define GN  4.49451e-18  // gravitational constant, kpc^3 / M_sun / kyr^2
+#define C_L 0.306594593  // speed of light in kpc/kyr
+
+#define MYR      31.536e12         // Myears in secs
+#define KPC      3.086e16          // kpc in km
+#define G_COSMO  4.300927161e-06;  // gravitational constant, kpc km^2 s^-2 Msun^-1
+#define MSUN_CGS 1.98847e33;       // Msun in gr
+#define KPC_CGS  3.086e21;         // kpc in cm
+#define KM_CGS   1e5;              // km in cm
+#define MH       1.67262171e-24    // Mass of hydrogen [g]
+
+#define TIME_UNIT           3.15569e10     // 1 kyr in s
+#define LENGTH_UNIT         3.08567758e21  // 1 kpc in cm
+#define MASS_UNIT           1.98847e33     // 1 solar mass in grams
+#define DENSITY_UNIT        (MASS_UNIT / (LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT))
+#define VELOCITY_UNIT       (LENGTH_UNIT / TIME_UNIT)
+#define ENERGY_UNIT         (DENSITY_UNIT * VELOCITY_UNIT * VELOCITY_UNIT)
+#define PRESSURE_UNIT       (DENSITY_UNIT * VELOCITY_UNIT * VELOCITY_UNIT)
+#define SP_ENERGY_UNIT      (VELOCITY_UNIT * VELOCITY_UNIT)
+#define MAGNETIC_FIELD_UNIT (sqrt(MASS_UNIT / LENGTH_UNIT) / TIME_UNIT)
 
 #define LOG_FILE_NAME "run_output.log"
 
-//Conserved Floor Values
-#define TEMP_FLOOR 1e-3 // in Kelvin
-#define DENS_FLOOR 1e-5 // in code units
+// Parameters for Enzo dual Energy Condition
+// - Prior to GH PR #356, DE_ETA_1 nominally had a value of 0.001 in all
+//   simulations (in practice, the value of DE_ETA_1 had minimal significance
+//   in those simulations). In PR #356, we revised the internal-energy
+//   synchronization to account for the value of DE_ETA_1. This was necessary
+//   for non-cosmology simulations.
+// - In Cosmological simulation, we set DE_ETA_1 to a large number (it doesn't
+//   really matter what, as long as its >=1) to maintain the older behavior
+// - In the future, we run tests and revisit the choice of DE_ETA_1 in
+//   cosmological simulations
+#ifdef COSMOLOGY
+  #define DE_ETA_1 10.0
+#else
+  #define DE_ETA_1 \
+    0.001  // Ratio of U to E for which  Internal Energy is used to compute the
+           // Pressure. This also affects when the Internal Energy is used for
+           // the update.
+#endif
 
-//Parameter for Enzo dual Energy Condition
-#define DE_ETA_1 0.001 //Ratio of U to E for which  Internal Energy is used to compute the Pressure
-#define DE_ETA_2 0.035 //Ratio of U to max(E_local) used to select which Internal Energy is used for the update.
+#define DE_ETA_2 \
+  0.035  // Ratio of U to max(E_local) used to select which Internal Energy is
+         // used for the update.
 
 // Maximum time step for cosmological simulations
-#define MAX_DELTA_A 0.001
+#define MAX_DELTA_A        0.001
 #define MAX_EXPANSION_RATE 0.01  // Limit delta(a)/a
 
-#ifdef COOLING_GRACKLE
-  #ifdef GRACKLE_METALS
-  #define NSCALARS 7
-  #else
-  #define NSCALARS 6
-  #endif // GRACKLE_METALS
-#elif CHEMISTRY_GPU
-  #define NSCALARS 6
-#else
-#ifdef SCALAR
-// Set Number of scalar fields when not using grackle
-#define NSCALARS 1
-#else
-#define NSCALARS 0
-#endif//SCALAR
-#endif//COOLING_GRACKLE
-
-#ifdef  MHD
+#ifdef MHD
   #define N_MHD_FIELDS 3
 #else
   #define N_MHD_FIELDS 0
-#endif  //MHD
+#endif  // MHD
 
 // Inital Chemistry fractions
-#define INITIAL_FRACTION_HI        0.75984603480
-#define INITIAL_FRACTION_HII       1.53965115054e-4
-#define INITIAL_FRACTION_HEI       0.24000000008
-#define INITIAL_FRACTION_HEII      9.59999999903e-15
-#define INITIAL_FRACTION_HEIII     9.59999999903e-18
-#define INITIAL_FRACTION_ELECTRON  1.53965115054e-4
-#define INITIAL_FRACTION_METAL     1.00000000000e-10
-
-
-//Default Particles Compiler Flags
+#define INITIAL_FRACTION_HI       0.75984603480
+#define INITIAL_FRACTION_HII      1.53965115054e-4
+#define INITIAL_FRACTION_HEI      0.24000000008
+#define INITIAL_FRACTION_HEII     9.59999999903e-15
+#define INITIAL_FRACTION_HEIII    9.59999999903e-18
+#define INITIAL_FRACTION_ELECTRON 1.53965115054e-4
+#define INITIAL_FRACTION_METAL    1.00000000000e-10
+
+// Default Particles Compiler Flags
 #define PARTICLES_LONG_INTS
 #define PARTICLES_KDK
 
-
 #ifdef GRAVITY
-#ifdef GRAVITY_5_POINTS_GRADIENT
-#ifdef PARTICLES
-#define N_GHOST_POTENTIAL 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the CIC interpolation of the potential )
-#else
-#define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 5 point gradient
-#endif //PARTICLES
-
-#else
-#ifdef PARTICLES
-#define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the CIC interpolation of the potential )
-#else
-#define N_GHOST_POTENTIAL 1 // 1 ghost cells are needed for 3 point gradient
-#endif //PARTICLES
-#endif //GRAVITY_5_POINTS_GRADIENT
+  #ifdef GRAVITY_5_POINTS_GRADIENT
+    #ifdef PARTICLES
+      #define N_GHOST_POTENTIAL \
+        3  // 3 ghost cells are needed for 5 point gradient, ( one is for the
+           // CIC interpolation of the potential )
+    #else
+      #define N_GHOST_POTENTIAL 2  // 2 ghost cells are needed for 5 point gradient
+    #endif                         // PARTICLES
 
+  #else
+    #ifdef PARTICLES
+      #define N_GHOST_POTENTIAL \
+        2  // 2 ghost cells are needed for 3 point gradient, ( one is for the
+           // CIC interpolation of the potential )
+    #else
+      #define N_GHOST_POTENTIAL 1  // 1 ghost cells are needed for 3 point gradient
+    #endif                         // PARTICLES
+  #endif                           // GRAVITY_5_POINTS_GRADIENT
 
 typedef long int grav_int_t;
 #endif
 
 #ifdef PARTICLES
-#ifdef PARTICLES_LONG_INTS
+  #ifdef PARTICLES_LONG_INTS
 typedef long int part_int_t;
-#else
+  #else
 typedef int part_int_t;
-#endif//PARTICLES_LONG_INTS
+  #endif  // PARTICLES_LONG_INTS
 
-#include <vector>
+  #include <vector>
 typedef std::vector<Real> real_vector_t;
 typedef std::vector<part_int_t> int_vector_t;
-#ifdef MPI_CHOLLA
+  #ifdef MPI_CHOLLA
 // Constants for the inital size of the buffers for particles transfer
 // and the number of data transferred for each particle
 extern int N_PARTICLES_TRANSFER;
 extern int N_DATA_PER_PARTICLE_TRANSFER;
-#endif//MPI_CHOLLA
-
-#ifdef AVERAGE_SLOW_CELLS
-#define SLOW_FACTOR 10
-#endif//AVERAGE_SLOW_CELLS
-
-#endif//PARTICLES
+  #endif  // MPI_CHOLLA
 
+  #ifdef AVERAGE_SLOW_CELLS
+    #define SLOW_FACTOR 10
+  #endif  // AVERAGE_SLOW_CELLS
 
-#define SIGN(a) ( ((a) < 0.) ? -1. : 1. )
+#endif  // PARTICLES
 
+#define SIGN(a) (((a) < 0.) ? -1. : 1.)
 
 /* Global variables */
-extern Real gama; // Ratio of specific heats
-extern Real C_cfl; // CFL number (0 - 0.5)
+extern Real gama;   // Ratio of specific heats
+extern Real C_cfl;  // CFL number (0 - 0.5)
 extern Real t_comm;
 extern Real t_other;
 
-#ifdef COOLING_CPU
-extern gsl_interp_accel *acc;
-extern gsl_interp_accel *xacc;
-extern gsl_interp_accel *yacc;
-extern gsl_spline *highT_C_spline;
-extern gsl_spline2d *lowT_C_spline;
-extern gsl_spline2d *lowT_H_spline;
-#endif
 #ifdef COOLING_GPU
 extern float *cooling_table;
 extern float *heating_table;
@@ -173,23 +158,29 @@ extern float *heating_table;
  *  \brief Set gamma values for Riemann solver. */
 extern void Set_Gammas(Real gamma_in);
 
-/*! \fn double get_time(void)
+/*! \fn double Get_Time(void)
  *  \brief Returns the current clock time. */
-extern double get_time(void);
+extern double Get_Time(void);
 
 /*! \fn int sgn
  *  \brief Mathematical sign function. Returns sign of x. */
-extern int sgn(Real x);
-
-#ifndef CUDA
-/*! \fn Real calc_eta(Real cW[], Real gamma)
- *  \brief Calculate the eta value for the H correction. */
-extern Real calc_eta(Real cW[], Real gamma);
-#endif
-
-
-struct parameters
-{
+extern int Sgn(Real x);
+
+/* Global variables for mpi (but they are also initialized to sensible defaults when not using mpi)
+ *
+ * It may make sense to move these back into mpi_routines (but reorganizing the ifdef statements
+ * would take some work). It may make sense to also put these into their own namespace.
+ */
+extern int procID; /*process rank*/
+extern int nproc;  /*number of processes executing simulation*/
+extern int root;   /*rank of root process*/
+
+/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables
+ * that still meaningful in non-mpi simulations.
+ */
+void Init_Global_Parallel_Vars_No_MPI();
+
+struct Parameters {
   int nx;
   int ny;
   int nz;
@@ -199,24 +190,29 @@ struct parameters
   Real gamma;
   char init[MAXLEN];
   int nfile;
-  int n_hydro;
-  int n_particle;
-  int n_projection;
-  int n_rotated_projection;
-  int n_slice;
-  int n_out_float32=0;
-  int out_float32_density=0;
-  int out_float32_momentum_x=0;
-  int out_float32_momentum_y=0;
-  int out_float32_momentum_z=0;
-  int out_float32_Energy=0;
+  int n_hydro                = 1;
+  int n_particle             = 1;
+  int n_projection           = 1;
+  int n_rotated_projection   = 1;
+  int n_slice                = 1;
+  int n_out_float32          = 0;
+  int out_float32_density    = 0;
+  int out_float32_momentum_x = 0;
+  int out_float32_momentum_y = 0;
+  int out_float32_momentum_z = 0;
+  int out_float32_Energy     = 0;
 #ifdef DE
-  int out_float32_GasEnergy=0;
+  int out_float32_GasEnergy = 0;
+#endif
+  bool output_always      = false;
+  bool legacy_flat_outdir = false;
+#ifdef STATIC_GRAV
+  int custom_grav = 0;  // flag to set specific static gravity field
 #endif
 #ifdef MHD
-  int out_float32_magnetic_x=0;
-  int out_float32_magnetic_y=0;
-  int out_float32_magnetic_z=0;
+  int out_float32_magnetic_x = 0;
+  int out_float32_magnetic_y = 0;
+  int out_float32_magnetic_z = 0;
 #endif
   Real xmin;
   Real ymin;
@@ -230,7 +226,7 @@ struct parameters
   int yu_bcnd;
   int zl_bcnd;
   int zu_bcnd;
-#ifdef   MPI_CHOLLA
+#ifdef MPI_CHOLLA
   int xlg_bcnd;
   int xug_bcnd;
   int ylg_bcnd;
@@ -240,49 +236,67 @@ struct parameters
 #endif /*MPI_CHOLLA*/
   char custom_bcnd[MAXLEN];
   char outdir[MAXLEN];
-  char indir[MAXLEN]; //Folder to load Initial conditions from
-  Real rho;
-  Real vx;
-  Real vy;
-  Real vz;
-  Real P;
-  Real A;
-  Real Bx;
-  Real By;
-  Real Bz;
-  Real rho_l;
-  Real vx_l;
-  Real vy_l=0;
-  Real vz_l=0;
-  Real P_l;
-  Real Bx_l;
-  Real By_l;
-  Real Bz_l;
-  Real rho_r;
-  Real vx_r;
-  Real vy_r=0;
-  Real vz_r=0;
-  Real P_r;
-  Real Bx_r;
-  Real By_r;
-  Real Bz_r;
-  Real diaph;
+  char indir[MAXLEN];  // Folder to load Initial conditions from
+  Real rho                 = 0;
+  Real vx                  = 0;
+  Real vy                  = 0;
+  Real vz                  = 0;
+  Real P                   = 0;
+  Real A                   = 0;
+  Real Bx                  = 0;
+  Real By                  = 0;
+  Real Bz                  = 0;
+  Real rho_l               = 0;
+  Real vx_l                = 0;
+  Real vy_l                = 0;
+  Real vz_l                = 0;
+  Real P_l                 = 0;
+  Real Bx_l                = 0;
+  Real By_l                = 0;
+  Real Bz_l                = 0;
+  Real rho_r               = 0;
+  Real vx_r                = 0;
+  Real vy_r                = 0;
+  Real vz_r                = 0;
+  Real P_r                 = 0;
+  Real Bx_r                = 0;
+  Real By_r                = 0;
+  Real Bz_r                = 0;
+  Real diaph               = 0;
+  Real rEigenVec_rho       = 0;
+  Real rEigenVec_MomentumX = 0;
+  Real rEigenVec_MomentumY = 0;
+  Real rEigenVec_MomentumZ = 0;
+  Real rEigenVec_E         = 0;
+  Real rEigenVec_Bx        = 0;
+  Real rEigenVec_By        = 0;
+  Real rEigenVec_Bz        = 0;
+  Real pitch               = 0;
+  Real yaw                 = 0;
+  Real polarization        = 0;
+  Real radius              = 0;
+  Real P_blast             = 0;
+  Real wave_length         = 1.0;
 #ifdef PARTICLES
   // The random seed for particle simulations. With the default of 0 then a
   // machine dependent seed will be generated.
   std::uint_fast64_t prng_seed = 0;
-#endif // PARTICLES
+#endif  // PARTICLES
+#ifdef SUPERNOVA
+  char snr_filename[MAXLEN];
+#endif
 #ifdef ROTATED_PROJECTION
+  // initialize rotation parameters to zero
   int nxr;
   int nzr;
-  Real delta;
-  Real theta;
-  Real phi;
+  Real delta = 0;
+  Real theta = 0;
+  Real phi   = 0;
   Real Lx;
   Real Lz;
-  int n_delta;
-  Real ddelta_dt;
-  int flag_delta;
+  int n_delta    = 0;
+  Real ddelta_dt = 0;
+  int flag_delta = 0;
 #endif /*ROTATED_PROJECTION*/
 #ifdef COSMOLOGY
   Real H0;
@@ -291,11 +305,12 @@ struct parameters
   Real Omega_b;
   Real Init_redshift;
   Real End_redshift;
-  char scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations
-#endif //COSMOLOGY
+  char scale_outputs_file[MAXLEN];  // File for the scale_factor output values
+                                    // for cosmological simulations
+#endif                              // COSMOLOGY
 #ifdef TILED_INITIAL_CONDITIONS
   Real tile_length;
-#endif //TILED_INITIAL_CONDITIONS
+#endif  // TILED_INITIAL_CONDITIONS
 
 #ifdef SET_MPI_GRID
   // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z]
@@ -304,11 +319,17 @@ struct parameters
   int n_proc_z;
 #endif
   int bc_potential_type;
-#if defined(COOLING_GRACKLE) || defined (CHEMISTRY_GPU)
-  char UVB_rates_file[MAXLEN]; //File for the UVB photoheating and photoionization rates of HI, HeI and HeII
-#endif  
+#if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU)
+  char UVB_rates_file[MAXLEN];  // File for the UVB photoheating and
+                                // photoionization rates of HI, HeI and HeII
+#endif
+  Real temperature_floor = 0;
+  Real density_floor     = 0;
+  Real scalar_floor      = 0;
 #ifdef ANALYSIS
-  char analysis_scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations {{}}
+  char analysis_scale_outputs_file[MAXLEN];  // File for the scale_factor output
+                                             // values for cosmological
+                                             // simulations {{}}
   char analysisdir[MAXLEN];
   int lya_skewers_stride;
   Real lya_Pk_d_log_k;
@@ -316,15 +337,20 @@ struct parameters
   char skewersdir[MAXLEN];
   #endif
 #endif
+#ifdef SCALAR
+  #ifdef DUST
+  Real grain_radius;
+  #endif
+#endif
 };
 
-
-/*! \fn void parse_params(char *param_file, struct parameters * parms);
+/*! \fn void parse_params(char *param_file, struct Parameters * parms);
  *  \brief Reads the parameters in the given file into a structure. */
-extern void parse_params (char *param_file, struct parameters * parms, int argc, char** argv);
+extern void Parse_Params(char *param_file, struct Parameters *parms, int argc, char **argv);
 
 /*! \fn int is_param_valid(char *name);
- * \brief Verifies that a param is valid (even if not needed).  Avoids "warnings" in output. */
-extern int is_param_valid(const char *name);
+ * \brief Verifies that a param is valid (even if not needed).  Avoids
+ * "warnings" in output. */
+extern int Is_Param_Valid(const char *name);
 
-#endif //GLOBAL_H
+#endif  // GLOBAL_H
diff --git a/src/global/global_cuda.cu b/src/global/global_cuda.cu
index bd2e235c1..17c515416 100644
--- a/src/global/global_cuda.cu
+++ b/src/global/global_cuda.cu
@@ -1,24 +1,16 @@
 /*! \file global_cuda.cu
  *  \brief Declarations of the cuda global variables. */
 
-#ifdef CUDA
-
 #include "../global/global.h"
 
 // Declare global variables
 bool memory_allocated;
 Real *dev_conserved, *dev_conserved_half;
 Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z;
+Real *ctElectricFields;
 Real *eta_x, *eta_y, *eta_z, *etah_x, *etah_y, *etah_z;
-Real *dev_dti;
 
-//Arrays for potential in GPU: Will be set to NULL if not using GRAVITY
+// Arrays for potential in GPU: Will be set to NULL if not using GRAVITY
 Real *dev_grav_potential;
 Real *temp_potential;
 Real *buffer_potential;
-
-// Arrays for calc_dt
-Real *host_dti_array;
-Real *dev_dti_array;
-
-#endif //CUDA
diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h
index 35c0c355f..3f4d3148e 100644
--- a/src/global/global_cuda.h
+++ b/src/global/global_cuda.h
@@ -1,23 +1,22 @@
 /*! /file global_cuda.h
- *  /brief Declarations of global variables and functions for the cuda kernels. */
-
-#ifdef CUDA
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "../utils/gpu.hpp"
-#include <math.h>
-#include "../global/global.h"
-
+ *  /brief Declarations of global variables and functions for the cuda kernels.
+ */
 
 #ifndef GLOBAL_CUDA_H
 #define GLOBAL_CUDA_H
 
-#define TPB 256 // threads per block
-//#define TPB 64
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../global/global.h"
+#include "../utils/gpu.hpp"
 
+#define TPB 256  // threads per block
+// #define TPB 64
 
-extern bool memory_allocated; // Flag becomes true after allocating the memory on the first timestep
+extern bool memory_allocated;  // Flag becomes true after allocating the memory
+                               // on the first timestep
 
 // Arrays are global so that they can be allocated only once.
 // Not all arrays will be allocated for every integrator
@@ -25,81 +24,46 @@ extern bool memory_allocated; // Flag becomes true after allocating the memory o
 // conserved variables
 extern Real *dev_conserved, *dev_conserved_half;
 // input states and associated interface fluxes (Q* and F* from Stone, 2008)
+// Note that for hydro the size of these arrays is n_fields*n_cells*sizeof(Real)
+// while for MHD it is (n_fields-1)*n_cells*sizeof(Real), i.e. they has one
+// fewer field than you would expect
 extern Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z;
+// Constrained transport electric fields
+extern Real *ctElectricFields;
 
-// Scalar for storing device side hydro/MHD time steps
-extern Real *dev_dti;
-
-// array of inverse timesteps for dt calculation (brought back by Alwin May 24 2022)
-extern Real *host_dti_array;
-extern Real *dev_dti_array;
-
-//Arrays for potential in GPU: Will be set to NULL if not using GRAVITY
+// Arrays for potential in GPU: Will be set to NULL if not using GRAVITY
 extern Real *dev_grav_potential;
 extern Real *temp_potential;
 extern Real *buffer_potential;
 
-#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
-#define CudaCheckError()    __cudaCheckError( __FILE__, __LINE__ )
-
-inline void __cudaSafeCall( cudaError err, const char *file, const int line )
-{
-#ifdef CUDA_ERROR_CHECK
-    if ( cudaSuccess != err )
-    {
-        fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n",
-                 file, line, cudaGetErrorString( err ) );
-        exit( -1 );
-    }
-#endif
-
-    return;
-}
-
-inline void __cudaCheckError( const char *file, const int line )
-{
-#ifdef CUDA_ERROR_CHECK
-    cudaError err = cudaGetLastError();
-    if ( cudaSuccess != err )
-    {
-        fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n",
-                 file, line, cudaGetErrorString( err ) );
-        exit( -1 );
-    }
-
-    // More careful checking. However, this will affect performance.
-    // Comment away if needed.
-    err = cudaDeviceSynchronize();
-    if( cudaSuccess != err )
-    {
-        fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n",
-                 file, line, cudaGetErrorString( err ) );
-        exit( -1 );
-    }
-#endif
-
-    return;
-}
-
-
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
-{
-   if (code != cudaSuccess)
-   {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-      if (abort) exit(code);
-   }
-}
-
 /*! \fn int sgn_CUDA
  *  \brief Mathematical sign function. Returns sign of x. */
 __device__ inline int sgn_CUDA(Real x)
 {
-  if (x < 0) return -1;
-  else return 1;
+  if (x < 0) {
+    return -1;
+  } else {
+    return 1;
+  }
 }
 
-#endif //GLOBAL_CUDA_H
+// Define atomic_add if it's not supported
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+#else
+__device__ double atomicAdd(double *address, double val)
+{
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
+  unsigned long long int old             = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old     = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+#endif
+
+// This helper function exists to make it easier to find printfs inside
+// kernels
+#define kernel_printf printf
 
-#endif //CUDA
+#endif  // GLOBAL_CUDA_H
diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp
index f07ebade0..866663589 100644
--- a/src/gravity/grav3D.cpp
+++ b/src/gravity/grav3D.cpp
@@ -1,85 +1,84 @@
 #ifdef GRAVITY
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include "../global/global.h"
-#include "../io/io.h"
+  #include "../gravity/grav3D.h"
 
-#include "../gravity/grav3D.h"
-
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <string.h>
 
+  #include "../global/global.h"
+  #include "../io/io.h"
 
+  #ifdef PARALLEL_OMP
+    #include "../utils/parallel_omp.h"
+  #endif
 
-Grav3D::Grav3D( void ){}
+Grav3D::Grav3D(void) {}
 
-void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P )
+void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly,
+                        Real Lz, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real,
+                        Real dy_real, Real dz_real, int n_ghost_pot_offset, struct Parameters *P)
 {
-
-  //Set Box Size
+  // Set Box Size
   Lbox_x = Lx;
   Lbox_y = Ly;
   Lbox_z = Lz;
 
-  //Set Box Left boundary positions
+  // Set Box Left boundary positions
   xMin = x_min;
   yMin = y_min;
   zMin = z_min;
 
-  //Set Box Right boundary positions
+  // Set Box Right boundary positions
   xMax = x_max;
   yMax = y_max;
   zMax = z_max;
 
-
-
-  //Set uniform ( dx, dy, dz )
+  // Set uniform ( dx, dy, dz )
   dx = dx_real;
   dy = dy_real;
   dz = dz_real;
 
-  //Set Box Total number of cells
+  // Set Box Total number of cells
   nx_total = nx;
   ny_total = ny;
   nz_total = nz;
 
-  //Set Box local domain number of cells
+  // Set Box local domain number of cells
   nx_local = nx_real;
   ny_local = ny_real;
   nz_local = nz_real;
 
-  //Local n_cells without ghost cells
-  n_cells = nx_local*ny_local*nz_local;
-  //Local n_cells including ghost cells for the potential array
-  n_cells_potential = ( nx_local + 2*N_GHOST_POTENTIAL ) * ( ny_local + 2*N_GHOST_POTENTIAL ) * ( nz_local + 2*N_GHOST_POTENTIAL );
+  // Local n_cells without ghost cells
+  n_cells = nx_local * ny_local * nz_local;
+  // Local n_cells including ghost cells for the potential array
+  n_cells_potential =
+      (nx_local + 2 * N_GHOST_POTENTIAL) * (ny_local + 2 * N_GHOST_POTENTIAL) * (nz_local + 2 * N_GHOST_POTENTIAL);
 
-  //Set Initial and dt used for the extrapolation of the potential;
-  //The first timestep the potential in not extrapolated ( INITIAL = TRUE )
+  // Set Initial and dt used for the extrapolation of the potential;
+  // The first timestep the potential in not extrapolated ( INITIAL = TRUE )
   INITIAL = true;
   dt_prev = 0;
-  dt_now = 0;
+  dt_now  = 0;
 
   #ifdef COSMOLOGY
-  //Set the scale factor for cosmological simulations to 1,
-  //This will be changed to the proper value when cosmology is initialized
+  // Set the scale factor for cosmological simulations to 1,
+  // This will be changed to the proper value when cosmology is initialized
   current_a = 1;
   #endif
 
-  //Set the average density=0 ( Not Used )
+  // Set the average density=0 ( Not Used )
   dens_avrg = 0;
 
-  //Set the Gravitational Constant ( units must be consistent )
+  // Set the Gravitational Constant ( units must be consistent )
   Gconst = GN;
-  if (strcmp(P->init, "Spherical_Overdensity_3D")==0){
+  if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) {
     Gconst = 1;
     chprintf(" WARNING: Using Gravitational Constant G=1.\n");
   }
 
-  //Flag to transfer the Potential boundaries
+  // Flag to transfer the Potential boundaries
   TRANSFER_POTENTIAL_BOUNDARIES = false;
 
   // Flag to set the gravity boundary flags
@@ -93,12 +92,14 @@ void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_
 
   Initialize_values_CPU();
 
-  chprintf( "Gravity Initialized: \n Lbox: %0.2f %0.2f %0.2f \n Local: %d %d %d \n Global: %d %d %d \n",
-      Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local,   nx_total, ny_total, nz_total );
+  chprintf(
+      "Gravity Initialized: \n Lbox: %0.2f %0.2f %0.2f \n Local: %d %d %d \n "
+      "Global: %d %d %d \n",
+      Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, nx_total, ny_total, nz_total);
 
-  chprintf( " dx:%f  dy:%f  dz:%f\n", dx, dy, dz );
-  chprintf( " N ghost potential: %d\n", N_GHOST_POTENTIAL);
-  chprintf( " N ghost offset: %d\n", n_ghost_pot_offset);
+  chprintf(" dx:%f  dy:%f  dz:%f\n", dx, dy, dz);
+  chprintf(" N ghost potential: %d\n", N_GHOST_POTENTIAL);
+  chprintf(" N ghost offset: %d\n", n_ghost_pot_offset);
 
   #ifdef PARALLEL_OMP
   chprintf(" Using OMP for gravity calculations\n");
@@ -107,47 +108,67 @@ void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_
   chprintf("  N OMP Threads per MPI process: %d\n", N_OMP_THREADS);
   #endif
 
-  Poisson_solver.Initialize( Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz );
+  Poisson_solver.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local,
+                            nz_local, dx, dy, dz);
   #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST)
-  Poisson_solver_test.Initialize( Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz );
+  Poisson_solver_test.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local,
+                                 ny_local, nz_local, dx, dy, dz);
   #endif
+
+  // At the end of initializing, set restart state if needed
+
+  if ((strcmp(P->init, "Read_Grid") == 0) && (P->nfile > 0)) {
+    Read_Restart_HDF5(P, P->nfile);
+  }
 }
 
 void Grav3D::AllocateMemory_CPU(void)
 {
   // allocate memory for the density and potential arrays
-  F.density_h    = (Real *) malloc(n_cells*sizeof(Real)); //array for the density
-  F.potential_h  = (Real *) malloc(n_cells_potential*sizeof(Real));   //array for the potential at the n-th timestep
-  F.potential_1_h  = (Real *) malloc(n_cells_potential*sizeof(Real)); //array for the potential at the (n-1)-th timestep
-  boundary_flags = (int *) malloc(6*sizeof(int)); // array for the gravity boundary flags
+  F.density_h = (Real *)malloc(n_cells * sizeof(Real));              // array for the
+                                                                     // density
+  F.potential_h = (Real *)malloc(n_cells_potential * sizeof(Real));  // array for the potential at the n-th timestep
+  F.potential_1_h =
+      (Real *)malloc(n_cells_potential * sizeof(Real));  // array for the potential at the (n-1)-th timestep
+  boundary_flags = (int *)malloc(6 * sizeof(int));       // array for the gravity boundary flags
 
   #ifdef GRAV_ISOLATED_BOUNDARY_X
-  F.pot_boundary_x0  = (Real *) malloc(N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)); //array for the potential isolated boundary
-  F.pot_boundary_x1  = (Real *) malloc(N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real));
+  F.pot_boundary_x0 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local *
+                                     sizeof(Real));  // array for the potential isolated boundary
+  F.pot_boundary_x1 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real));
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  F.pot_boundary_y0  = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)); //array for the potential isolated boundary
-  F.pot_boundary_y1  = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real));
+  F.pot_boundary_y0 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local *
+                                     sizeof(Real));  // array for the potential isolated boundary
+  F.pot_boundary_y1 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real));
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  F.pot_boundary_z0  = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)); //array for the potential isolated boundary
-  F.pot_boundary_z1  = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real));
+  F.pot_boundary_z0 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local *
+                                     sizeof(Real));  // array for the potential isolated boundary
+  F.pot_boundary_z1 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real));
   #endif
-}
 
-void Grav3D::Set_Boundary_Flags( int *flags ){
-  for (int i=0; i<6; i++) boundary_flags[i] = flags[i];
+  #ifdef GRAVITY_ANALYTIC_COMP
+  F.analytic_potential_h = (Real *)malloc(n_cells_potential * sizeof(Real));
+  #endif
 }
 
-void Grav3D::Initialize_values_CPU(void){
+void Grav3D::Set_Boundary_Flags(int *flags)
+{
+  for (int i = 0; i < 6; i++) {
+    boundary_flags[i] = flags[i];
+  }
+}
 
-  //Set initial values to 0.
-  for (int id=0; id<n_cells; id++){
+void Grav3D::Initialize_values_CPU(void)
+{
+  // Set initial values to 0.
+  for (int id = 0; id < n_cells; id++) {
     F.density_h[id] = 0;
   }
 
-  for (int id_pot=0; id_pot<n_cells_potential; id_pot++){
-    F.potential_h[id_pot] = 0;
+  for (int id_pot = 0; id_pot < n_cells_potential; id_pot++) {
+    F.potential_h[id_pot]   = 0;
     F.potential_1_h[id_pot] = 0;
   }
 }
@@ -157,7 +178,7 @@ void Grav3D::FreeMemory_CPU(void)
   free(F.density_h);
   free(F.potential_h);
   free(F.potential_1_h);
-  free( boundary_flags );
+  free(boundary_flags);
 
   #ifdef GRAV_ISOLATED_BOUNDARY_X
   free(F.pot_boundary_x0);
@@ -176,6 +197,10 @@ void Grav3D::FreeMemory_CPU(void)
   #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST)
   Poisson_solver_test.Reset();
   #endif
+
+  #ifdef GRAVITY_ANALYTIC_COMP
+  free(F.analytic_potential_h);
+  #endif
 }
 
-#endif //GRAVITY
+#endif  // GRAVITY
diff --git a/src/gravity/grav3D.h b/src/gravity/grav3D.h
index 011ccb870..69cf8308a 100644
--- a/src/gravity/grav3D.h
+++ b/src/gravity/grav3D.h
@@ -2,29 +2,30 @@
 #define GRAV3D_H
 
 #include <stdio.h>
+
 #include "../global/global.h"
 
 #ifdef SOR
-#include "../gravity/potential_SOR_3D.h"
+  #include "../gravity/potential_SOR_3D.h"
 #endif
 
 #ifdef PARIS
-#include "../gravity/potential_paris_3D.h"
+  #include "../gravity/potential_paris_3D.h"
 #endif
 
 #ifdef PARIS_GALACTIC
-#include "../gravity/potential_paris_galactic.h"
+  #include "../gravity/potential_paris_galactic.h"
 #endif
 
 #ifdef HDF5
-#include <hdf5.h>
+  #include <hdf5.h>
 #endif
 
 #define GRAV_ISOLATED_BOUNDARY_X
 #define GRAV_ISOLATED_BOUNDARY_Y
 #define GRAV_ISOLATED_BOUNDARY_Z
 
-#define TPB_GRAV 1024
+#define TPB_GRAV  1024
 #define TPBX_GRAV 16
 #define TPBY_GRAV 8
 #define TPBZ_GRAV 8
@@ -33,8 +34,7 @@
  *  \brief Class to create a the gravity object. */
 class Grav3D
 {
-  public:
-
+ public:
   Real Lbox_x;
   Real Lbox_y;
   Real Lbox_z;
@@ -46,46 +46,44 @@ class Grav3D
   Real yMax;
   Real zMax;
   /*! \var nx
-  *  \brief Total number of cells in the x-dimension */
+   *  \brief Total number of cells in the x-dimension */
   int nx_total;
   /*! \var ny
-  *  \brief Total number of cells in the y-dimension */
+   *  \brief Total number of cells in the y-dimension */
   int ny_total;
   /*! \var nz
-  *  \brief Total number of cells in the z-dimension */
+   *  \brief Total number of cells in the z-dimension */
   int nz_total;
 
   /*! \var nx_local
-  *  \brief Local number of cells in the x-dimension */
+   *  \brief Local number of cells in the x-dimension */
   int nx_local;
   /*! \var ny_local
-  *  \brief Local number of cells in the y-dimension */
+   *  \brief Local number of cells in the y-dimension */
   int ny_local;
   /*! \var nz_local
-  *  \brief Local number of cells in the z-dimension */
+   *  \brief Local number of cells in the z-dimension */
   int nz_local;
 
   /*! \var dx
-  *  \brief x-width of cells */
+   *  \brief x-width of cells */
   Real dx;
   /*! \var dy
-  *  \brief y-width of cells */
+   *  \brief y-width of cells */
   Real dy;
   /*! \var dz
-  *  \brief z-width of cells */
+   *  \brief z-width of cells */
   Real dz;
 
-  #ifdef COSMOLOGY
+#ifdef COSMOLOGY
   Real current_a;
-  #endif
-
-  Real dens_avrg ;
+#endif
 
+  Real dens_avrg;
 
   int n_cells;
   int n_cells_potential;
 
-
   bool INITIAL;
 
   Real dt_prev;
@@ -95,117 +93,129 @@ class Grav3D
 
   bool TRANSFER_POTENTIAL_BOUNDARIES;
 
-
   bool BC_FLAGS_SET;
   int *boundary_flags;
 
-
-  #ifdef SOR
+#ifdef SOR
   Potential_SOR_3D Poisson_solver;
-  #endif
+#endif
 
-  #ifdef PARIS
-  Potential_Paris_3D Poisson_solver;
-  #endif
+#ifdef PARIS
+  PotentialParis3D Poisson_solver;
+#endif
 
-  #ifdef PARIS_GALACTIC
+#ifdef PARIS_GALACTIC
   #ifdef SOR
-  #define PARIS_GALACTIC_TEST
-  Potential_Paris_Galactic Poisson_solver_test;
+    #define PARIS_GALACTIC_TEST
+  PotentialParisGalactic Poisson_solver_test;
   #else
-  Potential_Paris_Galactic Poisson_solver;
-  #endif
+  PotentialParisGalactic Poisson_solver;
   #endif
+#endif
 
-  struct Fields
-  {
+  struct Fields {
     /*! \var density_h
      *  \brief Array containing the density of each cell in the grid */
     Real *density_h;
 
     /*! \var potential_h
-     *  \brief Array containing the gravitational potential of each cell in the grid */
+     *  \brief Array containing the gravitational potential of each cell in the
+     * grid */
     Real *potential_h;
 
     /*! \var potential_h
-     *  \brief Array containing the gravitational potential of each cell in the grid at the previous time step */
+     *  \brief Array containing the gravitational potential of each cell in the
+     * grid at the previous time step */
     Real *potential_1_h;
 
-    #ifdef GRAVITY_GPU
+#ifdef GRAVITY_ANALYTIC_COMP
+    Real *analytic_potential_h;
+#endif
+
+#ifdef GRAVITY_GPU
 
     /*! \var density_d
      *  \brief Device Array containing the density of each cell in the grid */
     Real *density_d;
 
     /*! \var potential_d
-    *  \brief Device Array containing the gravitational potential of each cell in the grid */
+     *  \brief Device Array containing the gravitational potential of each cell
+     * in the grid */
     Real *potential_d;
 
     /*! \var potential_d
-    *  \brief Device Array containing the gravitational potential of each cell in the grid at the previous time step */
+     *  \brief Device Array containing the gravitational potential of each cell
+     * in the grid at the previous time step */
     Real *potential_1_d;
 
-    #endif //GRAVITY_GPU
+  #ifdef GRAVITY_ANALYTIC_COMP
+    Real *analytic_potential_d;
+  #endif
+
+#endif  // GRAVITY_GPU
 
-    // Arrays for computing the potential values in isolated boundaries
-    #ifdef GRAV_ISOLATED_BOUNDARY_X
+// Arrays for computing the potential values in isolated boundaries
+#ifdef GRAV_ISOLATED_BOUNDARY_X
     Real *pot_boundary_x0;
     Real *pot_boundary_x1;
-    #endif
-    #ifdef GRAV_ISOLATED_BOUNDARY_Y
+#endif
+#ifdef GRAV_ISOLATED_BOUNDARY_Y
     Real *pot_boundary_y0;
     Real *pot_boundary_y1;
-    #endif
-    #ifdef GRAV_ISOLATED_BOUNDARY_Z
+#endif
+#ifdef GRAV_ISOLATED_BOUNDARY_Z
     Real *pot_boundary_z0;
     Real *pot_boundary_z1;
-    #endif
+#endif
 
-    #ifdef GRAVITY_GPU
-    #ifdef GRAV_ISOLATED_BOUNDARY_X
+#ifdef GRAVITY_GPU
+  #ifdef GRAV_ISOLATED_BOUNDARY_X
     Real *pot_boundary_x0_d;
     Real *pot_boundary_x1_d;
-    #endif
-    #ifdef GRAV_ISOLATED_BOUNDARY_Y
+  #endif
+  #ifdef GRAV_ISOLATED_BOUNDARY_Y
     Real *pot_boundary_y0_d;
     Real *pot_boundary_y1_d;
-    #endif
-    #ifdef GRAV_ISOLATED_BOUNDARY_Z
+  #endif
+  #ifdef GRAV_ISOLATED_BOUNDARY_Z
     Real *pot_boundary_z0_d;
     Real *pot_boundary_z1_d;
-    #endif
-    #endif//GRAVITY_GPU
+  #endif
+#endif  // GRAVITY_GPU
 
   } F;
 
   /*! \fn Grav3D(void)
-  *  \brief Constructor for the gravity class */
+   *  \brief Constructor for the gravity class */
   Grav3D(void);
 
   /*! \fn void Initialize(int nx_in, int ny_in, int nz_in)
-  *  \brief Initialize the grid. */
-  void Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P);
+   *  \brief Initialize the grid. */
+  void Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz,
+                  int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real,
+                  Real dy_real, Real dz_real, int n_ghost_pot_offset, struct Parameters *P);
 
   void AllocateMemory_CPU(void);
   void Initialize_values_CPU();
   void FreeMemory_CPU(void);
 
-  Real Get_Average_Density( );
-  Real Get_Average_Density_function( int g_start, int g_end );
+  void Read_Restart_HDF5(struct Parameters *P, int nfile);
+  void Write_Restart_HDF5(struct Parameters *P, int nfile);
 
-  void Set_Boundary_Flags( int *flags );
+  Real Get_Average_Density();
+  Real Get_Average_Density_function(int g_start, int g_end);
 
-  #ifdef SOR
-  void Copy_Isolated_Boundary_To_GPU_buffer( Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size );
-  void Copy_Isolated_Boundaries_To_GPU( struct parameters *P );
-  #endif
+  void Set_Boundary_Flags(int *flags);
+
+#ifdef SOR
+  void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size);
+  void Copy_Isolated_Boundaries_To_GPU(struct Parameters *P);
+#endif
 
-  #ifdef GRAVITY_GPU
+#ifdef GRAVITY_GPU
   void AllocateMemory_GPU(void);
   void FreeMemory_GPU(void);
-  #endif
-
+#endif
 };
 
-
-#endif //GRAV3D_H
+#endif  // GRAV3D_H
diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp
index 8c813af81..5e4b101eb 100644
--- a/src/gravity/gravity_boundaries.cpp
+++ b/src/gravity/gravity_boundaries.cpp
@@ -1,98 +1,130 @@
 #ifdef GRAVITY
 
+  #include <cmath>
 
-#include <cmath>
-#include "../io/io.h"
-#include "../grid/grid3D.h"
-#include "../gravity/grav3D.h"
-#include "../model/disk_galaxy.h"
+  #include "../gravity/grav3D.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../model/disk_galaxy.h"
 
-#if defined (GRAV_ISOLATED_BOUNDARY_X) || defined (GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z)
-
-void Grid3D::Compute_Potential_Boundaries_Isolated( int dir, struct parameters *P ){
+  #if defined(GRAV_ISOLATED_BOUNDARY_X) || defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z)
 
+void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, struct Parameters *P)
+{
   // Set Isolated Boundaries for the ghost cells.
   int bc_potential_type = P->bc_potential_type;
-  //bc_potential_type = 0 -> Point mass potential GM/r
-  if ( dir == 0 ) Compute_Potential_Isolated_Boundary( 0, 0, bc_potential_type );
-  if ( dir == 1 ) Compute_Potential_Isolated_Boundary( 0, 1, bc_potential_type );
-  if ( dir == 2 ) Compute_Potential_Isolated_Boundary( 1, 0, bc_potential_type );
-  if ( dir == 3 ) Compute_Potential_Isolated_Boundary( 1, 1, bc_potential_type );
-  if ( dir == 4 ) Compute_Potential_Isolated_Boundary( 2, 0, bc_potential_type );
-  if ( dir == 5 ) Compute_Potential_Isolated_Boundary( 2, 1, bc_potential_type );
-
+  // bc_potential_type = 0 -> Point mass potential GM/r
+  if (dir == 0) {
+    Compute_Potential_Isolated_Boundary(0, 0, bc_potential_type);
+  }
+  if (dir == 1) {
+    Compute_Potential_Isolated_Boundary(0, 1, bc_potential_type);
+  }
+  if (dir == 2) {
+    Compute_Potential_Isolated_Boundary(1, 0, bc_potential_type);
+  }
+  if (dir == 3) {
+    Compute_Potential_Isolated_Boundary(1, 1, bc_potential_type);
+  }
+  if (dir == 4) {
+    Compute_Potential_Isolated_Boundary(2, 0, bc_potential_type);
+  }
+  if (dir == 5) {
+    Compute_Potential_Isolated_Boundary(2, 1, bc_potential_type);
+  }
 }
 
-void Grid3D::Set_Potential_Boundaries_Isolated( int direction, int side, int *flags ){
-
+void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, int *flags)
+{
   Real *pot_boundary;
   int n_i, n_j, nGHST;
   int nx_g, ny_g, nz_g;
   int nx_local, ny_local, nz_local;
-  nGHST = N_GHOST_POTENTIAL;
-  nx_g = Grav.nx_local + 2*nGHST;
-  ny_g = Grav.ny_local + 2*nGHST;
-  nz_g = Grav.nz_local + 2*nGHST;
+  nGHST    = N_GHOST_POTENTIAL;
+  nx_g     = Grav.nx_local + 2 * nGHST;
+  ny_g     = Grav.ny_local + 2 * nGHST;
+  nz_g     = Grav.nz_local + 2 * nGHST;
   nx_local = Grav.nx_local;
   ny_local = Grav.ny_local;
   nz_local = Grav.nz_local;
 
-  #ifdef GRAV_ISOLATED_BOUNDARY_X
-  if ( direction == 0 ){
+    #ifdef GRAV_ISOLATED_BOUNDARY_X
+  if (direction == 0) {
     n_i = Grav.ny_local;
     n_j = Grav.nz_local;
-    if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_x0;
-    if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_x1;
+    if (side == 0) {
+      pot_boundary = Grav.F.pot_boundary_x0;
+    }
+    if (side == 1) {
+      pot_boundary = Grav.F.pot_boundary_x1;
+    }
   }
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  if ( direction == 1 ){
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Y
+  if (direction == 1) {
     n_i = Grav.nx_local;
     n_j = Grav.nz_local;
-    if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_y0;
-    if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_y1;
+    if (side == 0) {
+      pot_boundary = Grav.F.pot_boundary_y0;
+    }
+    if (side == 1) {
+      pot_boundary = Grav.F.pot_boundary_y1;
+    }
   }
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  if ( direction == 2 ){
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Z
+  if (direction == 2) {
     n_i = Grav.nx_local;
     n_j = Grav.ny_local;
-    if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_z0;
-    if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_z1;
+    if (side == 0) {
+      pot_boundary = Grav.F.pot_boundary_z0;
+    }
+    if (side == 1) {
+      pot_boundary = Grav.F.pot_boundary_z1;
+    }
   }
-  #endif
+    #endif
 
   int i, j, k, id_buffer, id_grid;
 
-  for ( k=0; k<nGHST; k++ ){
-    for ( i=0; i<n_i; i++ ){
-      for ( j=0; j<n_j; j++ ){
-
-        id_buffer = i + j*n_i + k*n_i*n_j;
+  for (k = 0; k < nGHST; k++) {
+    for (i = 0; i < n_i; i++) {
+      for (j = 0; j < n_j; j++) {
+        id_buffer = i + j * n_i + k * n_i * n_j;
 
-        if ( direction == 0 ){
-          if ( side == 0 ) id_grid = (k)                + (i+nGHST)*nx_g + (j+nGHST)*nx_g*ny_g;
-          if ( side == 1 ) id_grid = (k+nx_local+nGHST) + (i+nGHST)*nx_g + (j+nGHST)*nx_g*ny_g;
+        if (direction == 0) {
+          if (side == 0) {
+            id_grid = (k) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g;
+          }
+          if (side == 1) {
+            id_grid = (k + nx_local + nGHST) + (i + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g;
+          }
         }
-        if ( direction == 1 ){
-          if ( side == 0 ) id_grid = (i+nGHST) + (k)*nx_g                + (j+nGHST)*nx_g*ny_g;
-          if ( side == 1 ) id_grid = (i+nGHST) + (k+ny_local+nGHST)*nx_g + (j+nGHST)*nx_g*ny_g;
+        if (direction == 1) {
+          if (side == 0) {
+            id_grid = (i + nGHST) + (k)*nx_g + (j + nGHST) * nx_g * ny_g;
+          }
+          if (side == 1) {
+            id_grid = (i + nGHST) + (k + ny_local + nGHST) * nx_g + (j + nGHST) * nx_g * ny_g;
+          }
         }
-        if ( direction == 2 ){
-          if ( side == 0 ) id_grid = (i+nGHST) + (j+nGHST)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) id_grid = (i+nGHST) + (j+nGHST)*nx_g + (k+nz_local+nGHST)*nx_g*ny_g;
+        if (direction == 2) {
+          if (side == 0) {
+            id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            id_grid = (i + nGHST) + (j + nGHST) * nx_g + (k + nz_local + nGHST) * nx_g * ny_g;
+          }
         }
 
         Grav.F.potential_h[id_grid] = pot_boundary[id_buffer];
       }
     }
   }
-
 }
 
-
-void Grid3D::Compute_Potential_Isolated_Boundary( int direction, int side,  int bc_potential_type ){
-
+void Grid3D::Compute_Potential_Isolated_Boundary(int direction, int side, int bc_potential_type)
+{
   Real domain_l, Lx_local, Ly_local, Lz_local;
   Real *pot_boundary;
   int n_i, n_j, nGHST;
@@ -102,229 +134,252 @@ void Grid3D::Compute_Potential_Isolated_Boundary( int direction, int side,  int
   Ly_local = Grav.ny_local * Grav.dy;
   Lz_local = Grav.nz_local * Grav.dz;
 
-
-
-  #ifdef GRAV_ISOLATED_BOUNDARY_X
-  if ( direction == 0 ){
+    #ifdef GRAV_ISOLATED_BOUNDARY_X
+  if (direction == 0) {
     domain_l = Grav.xMin;
-    n_i = Grav.ny_local;
-    n_j = Grav.nz_local;
-    if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_x0;
-    if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_x1;
+    n_i      = Grav.ny_local;
+    n_j      = Grav.nz_local;
+    if (side == 0) {
+      pot_boundary = Grav.F.pot_boundary_x0;
+    }
+    if (side == 1) {
+      pot_boundary = Grav.F.pot_boundary_x1;
+    }
   }
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  if ( direction == 1 ){
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Y
+  if (direction == 1) {
     domain_l = Grav.yMin;
-    n_i = Grav.nx_local;
-    n_j = Grav.nz_local;
-    if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_y0;
-    if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_y1;
+    n_i      = Grav.nx_local;
+    n_j      = Grav.nz_local;
+    if (side == 0) {
+      pot_boundary = Grav.F.pot_boundary_y0;
+    }
+    if (side == 1) {
+      pot_boundary = Grav.F.pot_boundary_y1;
+    }
   }
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  if ( direction == 2 ){
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Z
+  if (direction == 2) {
     domain_l = Grav.zMin;
-    n_i = Grav.nx_local;
-    n_j = Grav.ny_local;
-    if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_z0;
-    if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_z1;
+    n_i      = Grav.nx_local;
+    n_j      = Grav.ny_local;
+    if (side == 0) {
+      pot_boundary = Grav.F.pot_boundary_z0;
+    }
+    if (side == 1) {
+      pot_boundary = Grav.F.pot_boundary_z1;
+    }
   }
-  #endif
+    #endif
 
   Real M, cm_pos_x, cm_pos_y, cm_pos_z, pos_x, pos_y, pos_z, r, delta_x, delta_y, delta_z;
 
-  if ( bc_potential_type == 0 ){
+  if (bc_potential_type == 0) {
     const Real r0 = H.sphere_radius;
-    M = (H.sphere_density-H.sphere_background_density)*4.0*M_PI*r0*r0*r0/3.0;
-    cm_pos_x = H.sphere_center_x;
-    cm_pos_y = H.sphere_center_y;
-    cm_pos_z = H.sphere_center_z;
+    M             = (H.sphere_density - H.sphere_background_density) * 4.0 * M_PI * r0 * r0 * r0 / 3.0;
+    cm_pos_x      = H.sphere_center_x;
+    cm_pos_y      = H.sphere_center_y;
+    cm_pos_z      = H.sphere_center_z;
   }
 
-  // for bc_pontential_type = 1 the mod_frac is
-  // the disk mass fraction being modelled.
-  Real mod_frac = 1;
-  Real pot_val;
+  // for bc_pontential_type = 1 the mod_frac is the fraction
+  // of the disk mass contributed by the simulated particles
+  Real mod_frac = SIMULATED_FRACTION;
+  Real pot_val  = 0.0;
   int i, j, k, id;
-  for ( k=0; k<nGHST; k++ ){
-    for ( i=0; i<n_i; i++ ){
-      for ( j=0; j<n_j; j++ ){
-
-        id = i + j*n_i + k*n_i*n_j;
+  for (k = 0; k < nGHST; k++) {
+    for (i = 0; i < n_i; i++) {
+      for (j = 0; j < n_j; j++) {
+        id = i + j * n_i + k * n_i * n_j;
 
-        if ( direction == 0 ){
+        if (direction == 0) {
           // pos_x = Grav.xMin - ( nGHST + k + 0.5 ) * Grav.dx;
-          pos_x = Grav.xMin + ( k + 0.5 - nGHST ) * Grav.dx;
-          if ( side == 1 ) pos_x += Lx_local + nGHST*Grav.dx;
-          pos_y = Grav.yMin + ( i + 0.5 )* Grav.dy;
-          pos_z = Grav.zMin + ( j + 0.5 )* Grav.dz;
+          pos_x = Grav.xMin + (k + 0.5 - nGHST) * Grav.dx;
+          if (side == 1) {
+            pos_x += Lx_local + nGHST * Grav.dx;
+          }
+          pos_y = Grav.yMin + (i + 0.5) * Grav.dy;
+          pos_z = Grav.zMin + (j + 0.5) * Grav.dz;
         }
 
-        if ( direction == 1 ){
+        if (direction == 1) {
           // pos_y = Grav.yMin - ( nGHST + k + 0.5 ) * Grav.dy;
-          pos_y = Grav.yMin + ( k + 0.5 - nGHST ) * Grav.dy;
-          if ( side == 1 ) pos_y += Ly_local + nGHST*Grav.dy;
-          pos_x = Grav.xMin + ( i + 0.5 )* Grav.dx;
-          pos_z = Grav.zMin + ( j + 0.5 )* Grav.dz;
+          pos_y = Grav.yMin + (k + 0.5 - nGHST) * Grav.dy;
+          if (side == 1) {
+            pos_y += Ly_local + nGHST * Grav.dy;
+          }
+          pos_x = Grav.xMin + (i + 0.5) * Grav.dx;
+          pos_z = Grav.zMin + (j + 0.5) * Grav.dz;
         }
 
-        if ( direction == 2 ){
+        if (direction == 2) {
           // pos_z = Grav.zMin - ( nGHST + k + 0.5 ) * Grav.dz;
-          pos_z = Grav.zMin + ( k + 0.5 - nGHST ) * Grav.dz;
-          if ( side == 1 ) pos_z += Lz_local + nGHST*Grav.dz;
-          pos_x = Grav.xMin + ( i + 0.5 )* Grav.dx;
-          pos_y = Grav.yMin + ( j + 0.5 )* Grav.dy;
+          pos_z = Grav.zMin + (k + 0.5 - nGHST) * Grav.dz;
+          if (side == 1) {
+            pos_z += Lz_local + nGHST * Grav.dz;
+          }
+          pos_x = Grav.xMin + (i + 0.5) * Grav.dx;
+          pos_y = Grav.yMin + (j + 0.5) * Grav.dy;
         }
 
-        if ( bc_potential_type == 0){
-          //Point mass potential GM/r
+        if (bc_potential_type == 0) {
+          // Point mass potential GM/r
           delta_x = pos_x - cm_pos_x;
           delta_y = pos_y - cm_pos_y;
           delta_z = pos_z - cm_pos_z;
-          r = sqrt( ( delta_x * delta_x ) + ( delta_y * delta_y ) + ( delta_z * delta_z ) );
-          pot_val = - Grav.Gconst * M / r;
-        }
-        else if (bc_potential_type == 1) {
+          r       = sqrt((delta_x * delta_x) + (delta_y * delta_y) + (delta_z * delta_z));
+          pot_val = -Grav.Gconst * M / r;
+        } else if (bc_potential_type == 1) {
           // M-W disk potential
-          r = sqrt(pos_x*pos_x + pos_y*pos_y);
-          pot_val = mod_frac * Galaxies::MW.phi_disk_D3D(r, pos_z);
-        }
-        else{
-          chprintf("ERROR: Boundary Potential not set, need to set appropriate bc_potential_type \n");
+          r       = sqrt(pos_x * pos_x + pos_y * pos_y);
+          pot_val = mod_frac * galaxies::MW.phi_disk_D3D(r, pos_z);
+        } else {
+          chprintf(
+              "ERROR: Boundary Potential not set, need to set appropriate "
+              "bc_potential_type \n");
         }
 
         pot_boundary[id] = pot_val;
-
       }
     }
   }
-
 }
 
+  #endif  // GRAV_ISOLATED_BOUNDARY_X
 
-#endif //GRAV_ISOLATED_BOUNDARY_X
-
-void Grid3D::Set_Potential_Boundaries_Periodic( int direction, int side, int *flags ){
+void Grid3D::Set_Potential_Boundaries_Periodic(int direction, int side, int *flags)
+{
   // Flags: 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi)
 
   int i, j, k, indx_src, indx_dst;
   int nGHST, nx_g, ny_g, nz_g;
   nGHST = N_GHOST_POTENTIAL;
-  nx_g = Grav.nx_local + 2*nGHST;
-  ny_g = Grav.ny_local + 2*nGHST;
-  nz_g = Grav.nz_local + 2*nGHST;
-
-  //Copy X boundaries
-  if (direction == 0){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nGHST; i++ ){
-          if ( side == 0 ){
-            indx_src = (nx_g - 2*nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g; //Periodic
-            indx_dst = (i) + (j)*nx_g + (k)*nx_g*ny_g;
+  nx_g  = Grav.nx_local + 2 * nGHST;
+  ny_g  = Grav.ny_local + 2 * nGHST;
+  nz_g  = Grav.nz_local + 2 * nGHST;
+
+  // Copy X boundaries
+  if (direction == 0) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nGHST; i++) {
+          if (side == 0) {
+            indx_src = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;  // Periodic
+            indx_dst = (i) + (j)*nx_g + (k)*nx_g * ny_g;
           }
-          if ( side == 1 ){
-            indx_src = (i+nGHST) + (j)*nx_g + (k)*nx_g*ny_g;   //Periodic
-            indx_dst = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g;
+          if (side == 1) {
+            indx_src = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g;  // Periodic
+            indx_dst = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;
           }
-          Grav.F.potential_h[indx_dst] = Grav.F.potential_h[indx_src] ;
+          Grav.F.potential_h[indx_dst] = Grav.F.potential_h[indx_src];
         }
       }
     }
   }
 
-  //Copy Y boundaries
-  if (direction == 1){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<nGHST; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ){
-            indx_src = (i) + (ny_g - 2*nGHST + j)*nx_g + (k)*nx_g*ny_g; //Periodic
-            indx_dst = (i) + (j)*nx_g + (k)*nx_g*ny_g;
+  // Copy Y boundaries
+  if (direction == 1) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < nGHST; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx_src = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g;  // Periodic
+            indx_dst = (i) + (j)*nx_g + (k)*nx_g * ny_g;
           }
-          if ( side == 1 ){
-            indx_src = (i) + (j+nGHST)*nx_g + (k)*nx_g*ny_g; //Periodic
-            indx_dst = (i) + (ny_g - nGHST + j)*nx_g + (k)*nx_g*ny_g;
+          if (side == 1) {
+            indx_src = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g;  // Periodic
+            indx_dst = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g;
           }
-          Grav.F.potential_h[indx_dst] = Grav.F.potential_h[indx_src] ;
+          Grav.F.potential_h[indx_dst] = Grav.F.potential_h[indx_src];
         }
       }
     }
   }
 
-  //Copy Z boundaries
-  if (direction == 2){
-    for ( k=0; k<nGHST; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ){
-            indx_src = (i) + (j)*nx_g + (nz_g - 2*nGHST + k)*nx_g*ny_g;  //Periodic
-            indx_dst = (i) + (j)*nx_g + (k)*nx_g*ny_g;
+  // Copy Z boundaries
+  if (direction == 2) {
+    for (k = 0; k < nGHST; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx_src = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g;  // Periodic
+            indx_dst = (i) + (j)*nx_g + (k)*nx_g * ny_g;
           }
-          if ( side == 1 ){
-            indx_src = (i) + (j)*nx_g + (k+nGHST)*nx_g*ny_g; //Periodic
-            indx_dst = (i) + (j)*nx_g + (nz_g - nGHST + k)*nx_g*ny_g;
+          if (side == 1) {
+            indx_src = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g;  // Periodic
+            indx_dst = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g;
           }
-        Grav.F.potential_h[indx_dst] = Grav.F.potential_h[indx_src] ;
+          Grav.F.potential_h[indx_dst] = Grav.F.potential_h[indx_src];
         }
       }
     }
   }
-
 }
 
-#ifdef MPI_CHOLLA
-int Grid3D::Load_Gravity_Potential_To_Buffer( int direction, int side, Real *buffer, int buffer_start  ){
-
-
+  #ifdef MPI_CHOLLA
+int Grid3D::Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, int buffer_start)
+{
   int i, j, k, indx, indx_buff, length;
   int nGHST, nx_g, ny_g, nz_g;
   nGHST = N_GHOST_POTENTIAL;
-  nx_g = Grav.nx_local + 2*nGHST;
-  ny_g = Grav.ny_local + 2*nGHST;
-  nz_g = Grav.nz_local + 2*nGHST;
+  nx_g  = Grav.nx_local + 2 * nGHST;
+  ny_g  = Grav.ny_local + 2 * nGHST;
+  nz_g  = Grav.nz_local + 2 * nGHST;
 
-  //Load X boundaries
-  if (direction == 0){
+  // Load X boundaries
+  if (direction == 0) {
     length = nGHST * nz_g * ny_g;
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nGHST; i++ ){
-          if ( side == 0 ) indx = (i+nGHST) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (nx_g - 2*nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = (j) + (k)*ny_g + i*ny_g*nz_g ;
-          buffer[buffer_start+indx_buff] = Grav.F.potential_h[indx];
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nGHST; i++) {
+          if (side == 0) {
+            indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff                        = (j) + (k)*ny_g + i * ny_g * nz_g;
+          buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx];
         }
       }
     }
   }
 
-  //Load Y boundaries
-  if (direction == 1){
+  // Load Y boundaries
+  if (direction == 1) {
     length = nGHST * nz_g * nx_g;
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<nGHST; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j+nGHST)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (ny_g - 2*nGHST + j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = (i) + (k)*nx_g + j*nx_g*nz_g ;
-          buffer[buffer_start+indx_buff] = Grav.F.potential_h[indx];
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < nGHST; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff                        = (i) + (k)*nx_g + j * nx_g * nz_g;
+          buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx];
         }
       }
     }
   }
 
-  //Load Z boundaries
-  if (direction == 2){
+  // Load Z boundaries
+  if (direction == 2) {
     length = nGHST * nx_g * ny_g;
-    for ( k=0; k<nGHST; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k+nGHST)*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (j)*nx_g + (nz_g - 2*nGHST + k)*nx_g*ny_g;
-          indx_buff = (i) + (j)*nx_g + k*nx_g*ny_g ;
-          buffer[buffer_start+indx_buff] = Grav.F.potential_h[indx];
+    for (k = 0; k < nGHST; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g;
+          }
+          indx_buff                        = (i) + (j)*nx_g + k * nx_g * ny_g;
+          buffer[buffer_start + indx_buff] = Grav.F.potential_h[indx];
         }
       }
     }
@@ -332,59 +387,69 @@ int Grid3D::Load_Gravity_Potential_To_Buffer( int direction, int side, Real *buf
   return length;
 }
 
-
-void Grid3D::Unload_Gravity_Potential_from_Buffer( int direction, int side, Real *buffer, int buffer_start  ){
-
-
+void Grid3D::Unload_Gravity_Potential_from_Buffer(int direction, int side, Real *buffer, int buffer_start)
+{
   int i, j, k, indx, indx_buff;
   int nGHST, nx_g, ny_g, nz_g;
   nGHST = N_GHOST_POTENTIAL;
-  nx_g = Grav.nx_local + 2*nGHST;
-  ny_g = Grav.ny_local + 2*nGHST;
-  nz_g = Grav.nz_local + 2*nGHST;
-
-  //Load X boundaries
-  if (direction == 0){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nGHST; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = (j) + (k)*ny_g + i*ny_g*nz_g ;
-          Grav.F.potential_h[indx] = buffer[buffer_start+indx_buff];
+  nx_g  = Grav.nx_local + 2 * nGHST;
+  ny_g  = Grav.ny_local + 2 * nGHST;
+  nz_g  = Grav.nz_local + 2 * nGHST;
+
+  // Load X boundaries
+  if (direction == 0) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nGHST; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff                = (j) + (k)*ny_g + i * ny_g * nz_g;
+          Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff];
         }
       }
     }
   }
 
-  //Load Y boundaries
-  if (direction == 1){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<nGHST; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (ny_g - nGHST + j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = (i) + (k)*nx_g + j*nx_g*nz_g ;
-          Grav.F.potential_h[indx] = buffer[buffer_start+indx_buff];
+  // Load Y boundaries
+  if (direction == 1) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < nGHST; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff                = (i) + (k)*nx_g + j * nx_g * nz_g;
+          Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff];
         }
       }
     }
   }
 
-  //Load Z boundaries
-  if (direction == 2){
-    for ( k=0; k<nGHST; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (j)*nx_g + (nz_g - nGHST + k)*nx_g*ny_g;
-          indx_buff = (i) + (j)*nx_g + k*nx_g*ny_g ;
-          Grav.F.potential_h[indx] = buffer[buffer_start+indx_buff];
+  // Load Z boundaries
+  if (direction == 2) {
+    for (k = 0; k < nGHST; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g;
+          }
+          indx_buff                = (i) + (j)*nx_g + k * nx_g * ny_g;
+          Grav.F.potential_h[indx] = buffer[buffer_start + indx_buff];
         }
       }
     }
   }
 }
 
-#endif //GRAVITY
-#endif //MPI_CHOLLA
+  #endif  // GRAVITY
+#endif    // MPI_CHOLLA
diff --git a/src/gravity/gravity_boundaries_gpu.cu b/src/gravity/gravity_boundaries_gpu.cu
index 4e79cfa4d..63f8d6e86 100644
--- a/src/gravity/gravity_boundaries_gpu.cu
+++ b/src/gravity/gravity_boundaries_gpu.cu
@@ -1,163 +1,221 @@
 #if defined(GRAVITY) && defined(GRAVITY_GPU)
 
+  #include <cmath>
 
-#include <cmath>
-#include "../io/io.h"
-#include "../grid/grid3D.h"
-#include "../gravity/grav3D.h"
+  #include "../gravity/grav3D.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
 
+  #if defined(GRAV_ISOLATED_BOUNDARY_X) || defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z)
 
-#if defined (GRAV_ISOLATED_BOUNDARY_X) || defined (GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z)
-
-void __global__ Set_Potential_Boundaries_Isolated_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *potential_d, Real *pot_boundary_d   ){
-
+void __global__ Set_Potential_Boundaries_Isolated_kernel(int direction, int side, int size_buffer, int n_i, int n_j,
+                                                         int nx, int ny, int nz, int n_ghost, Real *potential_d,
+                                                         Real *pot_boundary_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) {
+    return;
+  }
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = ( tid_k )                + (tid_i+n_ghost)*nx + (tid_j+n_ghost)*nx*ny;
-    if ( side == 1 ) tid_pot = ( nx - n_ghost + tid_k ) + (tid_i+n_ghost)*nx + (tid_j+n_ghost)*nx*ny;
+  if (direction == 0) {
+    if (side == 0) {
+      tid_pot = (tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (nx - n_ghost + tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny;
+    }
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i+n_ghost) + ( tid_k )*nx                 + (tid_j+n_ghost)*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i+n_ghost) + ( ny - n_ghost + tid_k  )*nx + (tid_j+n_ghost)*nx*ny;
+  if (direction == 1) {
+    if (side == 0) {
+      tid_pot = (tid_i + n_ghost) + (tid_k)*nx + (tid_j + n_ghost) * nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (tid_i + n_ghost) + (ny - n_ghost + tid_k) * nx + (tid_j + n_ghost) * nx * ny;
+    }
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx + ( tid_k )*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx + ( nz - n_ghost + tid_k  )*nx*ny;
+  if (direction == 2) {
+    if (side == 0) {
+      tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (tid_k)*nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (nz - n_ghost + tid_k) * nx * ny;
+    }
   }
 
   potential_d[tid_pot] = pot_boundary_d[tid_buffer];
 }
 
-void Grid3D::Set_Potential_Boundaries_Isolated_GPU( int direction, int side, int *flags ){
-
+void Grid3D::Set_Potential_Boundaries_Isolated_GPU(int direction, int side, int *flags)
+{
   int n_i, n_j, n_ghost, size_buffer;
   int nx_g, ny_g, nz_g;
   n_ghost = N_GHOST_POTENTIAL;
-  nx_g = Grav.nx_local + 2*n_ghost;
-  ny_g = Grav.ny_local + 2*n_ghost;
-  nz_g = Grav.nz_local + 2*n_ghost;
-
+  nx_g    = Grav.nx_local + 2 * n_ghost;
+  ny_g    = Grav.ny_local + 2 * n_ghost;
+  nz_g    = Grav.nz_local + 2 * n_ghost;
 
   Real *pot_boundary_h, *pot_boundary_d;
-  #ifdef GRAV_ISOLATED_BOUNDARY_X
-  if ( direction == 0 ){
+    #ifdef GRAV_ISOLATED_BOUNDARY_X
+  if (direction == 0) {
     n_i = Grav.ny_local;
     n_j = Grav.nz_local;
-    if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_x0;
-    if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_x1;
-    if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_x0_d;
-    if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_x1_d;
+    if (side == 0) {
+      pot_boundary_h = Grav.F.pot_boundary_x0;
+    }
+    if (side == 1) {
+      pot_boundary_h = Grav.F.pot_boundary_x1;
+    }
+    if (side == 0) {
+      pot_boundary_d = Grav.F.pot_boundary_x0_d;
+    }
+    if (side == 1) {
+      pot_boundary_d = Grav.F.pot_boundary_x1_d;
+    }
   }
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  if ( direction == 1 ){
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Y
+  if (direction == 1) {
     n_i = Grav.nx_local;
     n_j = Grav.nz_local;
-    if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_y0;
-    if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_y1;
-    if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_y0_d;
-    if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_y1_d;
+    if (side == 0) {
+      pot_boundary_h = Grav.F.pot_boundary_y0;
+    }
+    if (side == 1) {
+      pot_boundary_h = Grav.F.pot_boundary_y1;
+    }
+    if (side == 0) {
+      pot_boundary_d = Grav.F.pot_boundary_y0_d;
+    }
+    if (side == 1) {
+      pot_boundary_d = Grav.F.pot_boundary_y1_d;
+    }
   }
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  if ( direction == 2 ){
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Z
+  if (direction == 2) {
     n_i = Grav.nx_local;
     n_j = Grav.ny_local;
-    if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_z0;
-    if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_z1;
-    if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_z0_d;
-    if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_z1_d;
+    if (side == 0) {
+      pot_boundary_h = Grav.F.pot_boundary_z0;
+    }
+    if (side == 1) {
+      pot_boundary_h = Grav.F.pot_boundary_z1;
+    }
+    if (side == 0) {
+      pot_boundary_d = Grav.F.pot_boundary_z0_d;
+    }
+    if (side == 1) {
+      pot_boundary_d = Grav.F.pot_boundary_z1_d;
+    }
   }
-  #endif
+    #endif
 
   size_buffer = N_GHOST_POTENTIAL * n_i * n_j;
 
   // set values for GPU kernels
-  int ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1;
+  int ngrid = (size_buffer - 1) / TPB_GRAV + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_GRAV, 1, 1);
 
-  //Copy the boundary array from host to device
-  cudaMemcpy( pot_boundary_d, pot_boundary_h, size_buffer*sizeof(Real), cudaMemcpyHostToDevice );
+  // Copy the boundary array from host to device
+  cudaMemcpy(pot_boundary_d, pot_boundary_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice);
   cudaDeviceSynchronize();
 
   // Copy the potential boundary from buffer to potential array
-  hipLaunchKernelGGL( Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, pot_boundary_d );
-
+  hipLaunchKernelGGL(Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side,
+                     size_buffer, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, pot_boundary_d);
 }
 
+  #endif  // GRAV_ISOLATED_BOUNDARY
 
-
-
-#endif //GRAV_ISOLATED_BOUNDARY
-
-
-void __global__ Set_Potential_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *potential_d ){
-  
+void __global__ Set_Potential_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, int ny,
+                                                         int nz, int n_ghost, Real *potential_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_src, tid_dst;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
-  
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return;
-  
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_src = ( nx - 2*n_ghost + tid_k )  + (tid_i)*nx  + (tid_j)*nx*ny;
-    if ( side == 0 ) tid_dst = ( tid_k )                   + (tid_i)*nx  + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_src = ( n_ghost + tid_k  )        + (tid_i)*nx  + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dst = ( nx - n_ghost + tid_k )    + (tid_i)*nx  + (tid_j)*nx*ny;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) {
+    return;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_src = (tid_i) + ( ny - 2*n_ghost + tid_k  )*nx  + (tid_j)*nx*ny;
-    if ( side == 0 ) tid_dst = (tid_i) + ( tid_k )*nx                    + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_src = (tid_i) + ( n_ghost + tid_k  )*nx         + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dst = (tid_i) + ( ny - n_ghost + tid_k )*nx     + (tid_j)*nx*ny;
+
+  if (direction == 0) {
+    if (side == 0) {
+      tid_src = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 0) {
+      tid_dst = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_src = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dst = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_src = (tid_i) + (tid_j)*nx + ( nz - 2*n_ghost + tid_k  )*nx*ny;
-    if ( side == 0 ) tid_dst = (tid_i) + (tid_j)*nx + ( tid_k  )*nx*ny;
-    if ( side == 1 ) tid_src = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k  )*nx*ny;
-    if ( side == 1 ) tid_dst = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k  )*nx*ny;
+  if (direction == 1) {
+    if (side == 0) {
+      tid_src = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
+    if (side == 0) {
+      tid_dst = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_src = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dst = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
   }
-  
+  if (direction == 2) {
+    if (side == 0) {
+      tid_src = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny;
+    }
+    if (side == 0) {
+      tid_dst = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny;
+    }
+    if (side == 1) {
+      tid_src = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny;
+    }
+    if (side == 1) {
+      tid_dst = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny;
+    }
+  }
+
   potential_d[tid_dst] = potential_d[tid_src];
-  
 }
 
-
-void Grid3D::Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int *flags ){
-  
+void Grid3D::Set_Potential_Boundaries_Periodic_GPU(int direction, int side, int *flags)
+{
   int n_i, n_j, n_ghost, size;
   int nx_g, ny_g, nz_g;
   n_ghost = N_GHOST_POTENTIAL;
-  nx_g = Grav.nx_local + 2*n_ghost;
-  ny_g = Grav.ny_local + 2*n_ghost;
-  nz_g = Grav.nz_local + 2*n_ghost;
+  nx_g    = Grav.nx_local + 2 * n_ghost;
+  ny_g    = Grav.ny_local + 2 * n_ghost;
+  nz_g    = Grav.nz_local + 2 * n_ghost;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_g;
     n_j = nz_g;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_g;
     n_j = nz_g;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_g;
     n_j = ny_g;
   }
@@ -165,66 +223,81 @@ void Grid3D::Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int
   size = N_GHOST_POTENTIAL * n_i * n_j;
 
   // set values for GPU kernels
-  int ngrid = ( size - 1 ) / TPB_GRAV + 1;
+  int ngrid = (size - 1) / TPB_GRAV + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_GRAV, 1, 1);
 
   // Copy the potential boundary from buffer to potential array
-  hipLaunchKernelGGL( Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d );
-
-
+  hipLaunchKernelGGL(Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j,
+                     nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d);
 }
 
-__global__ void Load_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d   ){
-
+__global__ void Load_Transfer_Buffer_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx,
+                                                int ny, int nz, int n_ghost_transfer, int n_ghost_potential,
+                                                Real *potential_d, Real *transfer_buffer_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) {
+    return;
+  }
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k  )                        + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = ( nx - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny;
+  if (direction == 0) {
+    if (side == 0) {
+      tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k  )*nx                         + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential - n_ghost_transfer + tid_k  )*nx + (tid_j)*nx*ny;
+  if (direction == 1) {
+    if (side == 0) {
+      tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential + tid_k  )*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential - n_ghost_transfer + tid_k  )*nx*ny;
+  if (direction == 2) {
+    if (side == 0) {
+      tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny;
+    }
   }
   transfer_buffer_d[tid_buffer] = potential_d[tid_pot];
-
 }
 
-int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start  ){
-
+int Grid3D::Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start)
+{
   // printf( "Loading Gravity Buffer: Dir %d  side: %d \n", direction, side );
-  int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;;
+  int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;
+  ;
   n_ghost_potential = N_GHOST_POTENTIAL;
   n_ghost_transfer  = N_GHOST_POTENTIAL;
-  nx_pot = Grav.nx_local + 2*n_ghost_potential;
-  ny_pot = Grav.ny_local + 2*n_ghost_potential;
-  nz_pot = Grav.nz_local + 2*n_ghost_potential;
+  nx_pot            = Grav.nx_local + 2 * n_ghost_potential;
+  ny_pot            = Grav.ny_local + 2 * n_ghost_potential;
+  nz_pot            = Grav.nz_local + 2 * n_ghost_potential;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_pot;
     n_j = nz_pot;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_pot;
     n_j = nz_pot;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_pot;
     n_j = ny_pot;
   }
@@ -232,7 +305,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real
   size_buffer = n_ghost_transfer * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1;
+  ngrid = (size_buffer - 1) / TPB_GRAV + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -244,61 +317,77 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real
   Real *send_buffer_d;
   send_buffer_d = buffer;
 
-  hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j,  nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, send_buffer_d  );
-  CHECK(cudaDeviceSynchronize());
+  hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i,
+                     n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, send_buffer_d);
+  GPU_Error_Check(cudaDeviceSynchronize());
 
   return size_buffer;
 }
 
-__global__ void Unload_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d   ){
-
+__global__ void Unload_Transfer_Buffer_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx,
+                                                  int ny, int nz, int n_ghost_transfer, int n_ghost_potential,
+                                                  Real *potential_d, Real *transfer_buffer_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) {
+    return;
+  }
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k  ) + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = ( nx - n_ghost_potential + tid_k )                + (tid_i)*nx + (tid_j)*nx*ny;
+  if (direction == 0) {
+    if (side == 0) {
+      tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k  )*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential + tid_k  )*nx               + (tid_j)*nx*ny;
+  if (direction == 1) {
+    if (side == 0) {
+      tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential - n_ghost_transfer + tid_k  )*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential + tid_k  )*nx*ny;
+  if (direction == 2) {
+    if (side == 0) {
+      tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny;
+    }
+    if (side == 1) {
+      tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny;
+    }
   }
   potential_d[tid_pot] = transfer_buffer_d[tid_buffer];
-
 }
 
-
-void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start  ){
-
+void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start)
+{
   // printf( "Loading Gravity Buffer: Dir %d  side: %d \n", direction, side );
-  int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;;
+  int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;
+  ;
   n_ghost_potential = N_GHOST_POTENTIAL;
   n_ghost_transfer  = N_GHOST_POTENTIAL;
-  nx_pot = Grav.nx_local + 2*n_ghost_potential;
-  ny_pot = Grav.ny_local + 2*n_ghost_potential;
-  nz_pot = Grav.nz_local + 2*n_ghost_potential;
+  nx_pot            = Grav.nx_local + 2 * n_ghost_potential;
+  ny_pot            = Grav.ny_local + 2 * n_ghost_potential;
+  nz_pot            = Grav.nz_local + 2 * n_ghost_potential;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_pot;
     n_j = nz_pot;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_pot;
     n_j = nz_pot;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_pot;
     n_j = ny_pot;
   }
@@ -306,7 +395,7 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side,
   size_buffer = n_ghost_transfer * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1;
+  ngrid = (size_buffer - 1) / TPB_GRAV + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -318,9 +407,8 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side,
   Real *recv_buffer_d;
   recv_buffer_d = buffer;
 
-  hipLaunchKernelGGL( Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j,  nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, recv_buffer_d  );
-
+  hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i,
+                     n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, recv_buffer_d);
 }
 
-
-#endif //GRAVITY
+#endif  // GRAVITY
diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp
index ed5b0ba87..744f55825 100644
--- a/src/gravity/gravity_functions.cpp
+++ b/src/gravity/gravity_functions.cpp
@@ -1,31 +1,29 @@
 #ifdef GRAVITY
 
-#include "../grid/grid3D.h"
-#include "../global/global.h"
-#include "../io/io.h"
-#include "../utils/error_handling.h"
-#include <cstring>
+  #include <cstring>
 
-#ifdef CUDA
-#include "../mpi/cuda_mpi_routines.h"
-#endif
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../mpi/cuda_mpi_routines.h"
+  #include "../utils/error_handling.h"
 
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
-
-#if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST)
-#include <vector>
-#endif
+  #ifdef PARALLEL_OMP
+    #include "../utils/parallel_omp.h"
+  #endif
 
-#ifdef PARTICLES
-#include "../model/disk_galaxy.h"
-#endif
+  #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST)
+    #include <vector>
+  #endif
 
-//Set delta_t when using gravity
-void Grid3D::set_dt_Gravity(){
+  // #ifdef PARTICLES
+  #include "../model/disk_galaxy.h"
+// #endif
 
-  //Delta_t for the hydro
+// Set delta_t when usi#ng gravity
+void Grid3D::set_dt_Gravity()
+{
+  // Delta_t for the hydro
   Real dt_hydro = H.dt;
 
   #ifdef AVERAGE_SLOW_CELLS
@@ -33,178 +31,182 @@ void Grid3D::set_dt_Gravity(){
   #endif
 
   #ifdef PARTICLES
-  //Compute delta_t for particles and choose min(dt_particles, dt_hydro)
+  // Compute delta_t for particles and choose min(dt_particles, dt_hydro)
   Real dt_particles, dt_min;
 
-  #ifdef COSMOLOGY
-  chprintf( "Current_z: %f \n", Cosmo.current_z );
+    #ifdef COSMOLOGY
+  chprintf("Current_z: %f \n", Cosmo.current_z);
   Real da_particles, da_min, dt_physical;
 
-  //Compute the particles delta_t
+  // Compute the particles delta_t
   Particles.dt = Calc_Particles_dt_Cosmo();
   dt_particles = Particles.dt;
-  //Convert delta_t to delta_a ( a = scale factor )
-  da_particles = Cosmo.Get_da_from_dt( dt_particles );
-  da_particles = fmin( da_particles, 1.0 ); //Limit delta_a
+  // Convert delta_t to delta_a ( a = scale factor )
+  da_particles = Cosmo.Get_da_from_dt(dt_particles);
+  da_particles = fmin(da_particles, 1.0);  // Limit delta_a
 
-  #ifdef ONLY_PARTICLES
-  //If only particles da_min is only da_particles
+      #ifdef ONLY_PARTICLES
+  // If only particles da_min is only da_particles
   da_min = da_particles;
-  chprintf( " Delta_a_particles: %f \n", da_particles );
+  chprintf(" Delta_a_particles: %f \n", da_particles);
 
-  #else //NOT ONLY_PARTICLES
-  //Here da_min is the minumum between da_particles and da_hydro
+      #else  // NOT ONLY_PARTICLES
+  // Here da_min is the minumum between da_particles and da_hydro
   Real da_hydro;
-  da_hydro = Cosmo.Get_da_from_dt( dt_hydro ) * Cosmo.current_a * Cosmo.current_a / Cosmo.H0; //Convet delta_t to delta_a
-  da_min = fmin( da_hydro, da_particles ); //Find the minumum delta_a
-  chprintf( " Delta_a_particles: %f      Delta_a_gas: %f   \n", da_particles, da_hydro );
+  da_hydro =
+      Cosmo.Get_da_from_dt(dt_hydro) * Cosmo.current_a * Cosmo.current_a / Cosmo.H0;  // Convet delta_t to delta_a
+  da_min = fmin(da_hydro, da_particles);                                              // Find the minumum delta_a
+  chprintf(" Delta_a_particles: %f      Delta_a_gas: %f   \n", da_particles, da_hydro);
 
-  #endif//ONLY_PARTICLES
+      #endif  // ONLY_PARTICLES
 
-  //Limit delta_a by the expansion rate
-  Cosmo.max_delta_a = fmin( MAX_EXPANSION_RATE * Cosmo.current_a, MAX_DELTA_A );
-  if( da_min > Cosmo.max_delta_a){
+  // Limit delta_a by the expansion rate
+  Cosmo.max_delta_a = fmin(MAX_EXPANSION_RATE * Cosmo.current_a, MAX_DELTA_A);
+  if (da_min > Cosmo.max_delta_a) {
     da_min = Cosmo.max_delta_a;
-    chprintf( " Seting max delta_a: %f\n", da_min );
+    chprintf(" Seting max delta_a: %f\n", da_min);
   }
 
-  //Small delta_a when reionization starts
-  #ifdef COOLING_GRACKLE
-  if ( fabs(Cosmo.current_a + da_min - Cool.scale_factor_UVB_on) < 0.005 ){
+      // Small delta_a when reionization starts
+      #ifdef COOLING_GRACKLE
+  if (fabs(Cosmo.current_a + da_min - Cool.scale_factor_UVB_on) < 0.005) {
     da_min /= 2;
-    chprintf( " Starting UVB. Limiting delta_a:  %f \n", da_min);
+    chprintf(" Starting UVB. Limiting delta_a:  %f \n", da_min);
   }
-  #endif
-  #ifdef CHEMISTRY_GPU
-  if ( fabs(Cosmo.current_a + da_min - Chem.scale_factor_UVB_on) < 0.005 ){
+      #endif
+      #ifdef CHEMISTRY_GPU
+  if (fabs(Cosmo.current_a + da_min - Chem.scale_factor_UVB_on) < 0.005) {
     da_min /= 2;
-    chprintf( " Starting UVB. Limiting delta_a:  %f \n", da_min);
+    chprintf(" Starting UVB. Limiting delta_a:  %f \n", da_min);
   }
-  #endif
-    
-  //Limit delta_a if it's time to output
-  if ( (Cosmo.current_a + da_min) >  Cosmo.next_output ){
-    da_min = Cosmo.next_output - Cosmo.current_a;
+      #endif
+
+  // Limit delta_a if it's time to output
+  if ((Cosmo.current_a + da_min) > Cosmo.next_output) {
+    da_min       = Cosmo.next_output - Cosmo.current_a;
     H.Output_Now = true;
   }
 
-  #ifdef ANALYSIS
-  //Limit delta_a if it's time to run analysis
-  if( Analysis.next_output_indx < Analysis.n_outputs ){
-    if ( H.Output_Now && fabs(Cosmo.current_a + da_min  - Analysis.next_output ) < 1e-6 )  Analysis.Output_Now = true;
-    else if ( Cosmo.current_a + da_min  >  Analysis.next_output ){
-      da_min = Analysis.next_output - Cosmo.current_a;
+      #ifdef ANALYSIS
+  // Limit delta_a if it's time to run analysis
+  if (Analysis.next_output_indx < Analysis.n_outputs) {
+    if (H.Output_Now && fabs(Cosmo.current_a + da_min - Analysis.next_output) < 1e-6)
+      Analysis.Output_Now = true;
+    else if (Cosmo.current_a + da_min > Analysis.next_output) {
+      da_min              = Analysis.next_output - Cosmo.current_a;
       Analysis.Output_Now = true;
     }
   }
-  #endif
-  
-  if ( da_min < 0 ){
-    chprintf( "ERROR: Negative delta_a");
+      #endif
+
+  if (da_min < 0) {
+    chprintf("ERROR: Negative delta_a");
     exit(-1);
-  } 
-  
-  
-  //Set delta_a after it has been computed
+  }
+
+  // Set delta_a after it has been computed
   Cosmo.delta_a = da_min;
-  //Convert delta_a back to delta_t
-  dt_min = Cosmo.Get_dt_from_da( Cosmo.delta_a ) * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a );
-  //Set the new delta_t for the hydro step
+  // Convert delta_a back to delta_t
+  dt_min = Cosmo.Get_dt_from_da(Cosmo.delta_a) * Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a);
+  // Set the new delta_t for the hydro step
   H.dt = dt_min;
-  chprintf( " Current_a: %f    delta_a: %f     dt:  %f\n", Cosmo.current_a, Cosmo.delta_a, H.dt  );
+  chprintf(" Current_a: %f    delta_a: %f     dt:  %f\n", Cosmo.current_a, Cosmo.delta_a, H.dt);
 
-  #ifdef AVERAGE_SLOW_CELLS
-  //Set the min_delta_t for averaging a slow cell
-  da_particles = fmin( da_particles, Cosmo.max_delta_a );
-  min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR;
+      #ifdef AVERAGE_SLOW_CELLS
+  // Set the min_delta_t for averaging a slow cell
+  da_particles = fmin(da_particles, Cosmo.max_delta_a);
+  min_dt_slow  = Cosmo.Get_dt_from_da(da_particles) / Particles.C_cfl * Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a) /
+                SLOW_FACTOR;
   H.min_dt_slow = min_dt_slow;
-  #endif
+      #endif
 
-  //Compute the physical time
-  dt_physical = Cosmo.Get_dt_from_da( Cosmo.delta_a );
+  // Compute the physical time
+  dt_physical   = Cosmo.Get_dt_from_da(Cosmo.delta_a);
   Cosmo.dt_secs = dt_physical * Cosmo.time_conversion;
   Cosmo.t_secs += Cosmo.dt_secs;
-  chprintf( " t_physical: %f Myr   dt_physical: %f Myr\n", Cosmo.t_secs/MYR, Cosmo.dt_secs/MYR );
+  chprintf(" t_physical: %f Myr   dt_physical: %f Myr\n", Cosmo.t_secs / MYR, Cosmo.dt_secs / MYR);
   Particles.dt = dt_physical;
 
-  #else // Not Cosmology
-  //If NOT using COSMOLOGY
+    #else  // Not Cosmology
+  // If NOT using COSMOLOGY
 
-  //Compute the particles delta_t
+  // Compute the particles delta_t
   dt_particles = Calc_Particles_dt();
-  dt_particles = fmin( dt_particles, Particles.max_dt);
-  #ifdef ONLY_PARTICLES
+  dt_particles = fmin(dt_particles, Particles.max_dt);
+      #ifdef ONLY_PARTICLES
   dt_min = dt_particles;
-  chprintf( " dt_particles: %f \n", dt_particles );
-  #else
-  chprintf( " dt_hydro: %f   dt_particles: %f \n", dt_hydro, dt_particles );
-  //Get the minimum delta_t between hydro and particles
-  dt_min = fmin( dt_hydro, dt_particles );
-  #endif//ONLY_PARTICLES
-
-  #ifdef AVERAGE_SLOW_CELLS
-  //Set the min_delta_t for averaging a slow cell
-  min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR;
+  chprintf(" dt_particles: %f \n", dt_particles);
+      #else
+  chprintf(" dt_hydro: %f   dt_particles: %f \n", dt_hydro, dt_particles);
+  // Get the minimum delta_t between hydro and particles
+  dt_min = fmin(dt_hydro, dt_particles);
+      #endif  // ONLY_PARTICLES
+
+      #ifdef AVERAGE_SLOW_CELLS
+  // Set the min_delta_t for averaging a slow cell
+  // min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR;
+  min_dt_slow   = 3 * H.dx;
   H.min_dt_slow = min_dt_slow;
-  #endif
+      #endif
 
-  //Set the new delta_t
-  H.dt = dt_min;
+  // Set the new delta_t
+  H.dt         = dt_min;
   Particles.dt = H.dt;
-  #endif//COSMOLOGY
-  #endif//PARTICLES
-
-  #if defined( AVERAGE_SLOW_CELLS) && !defined( PARTICLES )
-  //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition )
-  min_dt_slow = H.dt / C_cfl * 100 ;
+    #endif  // COSMOLOGY
+  #endif    // PARTICLES
+
+  #if defined(AVERAGE_SLOW_CELLS) && !defined(PARTICLES)
+  // Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is
+  // set to a large value, change this with your condition ) min_dt_slow = H.dt
+  // / C_cfl * 100 ;
+  min_dt_slow   = 3 * H.dx;
   H.min_dt_slow = min_dt_slow;
   #endif
 
   // Set current and previous delta_t for the potential extrapolation
-  if ( Grav.INITIAL ){
+  if (Grav.INITIAL) {
     Grav.dt_prev = H.dt;
-    Grav.dt_now = H.dt;
-  }else{
+    Grav.dt_now  = H.dt;
+  } else {
     Grav.dt_prev = Grav.dt_now;
-    Grav.dt_now = H.dt;
+    Grav.dt_now  = H.dt;
   }
-  
+
   #if defined(PARTICLES_GPU) && defined(PRINT_MAX_MEMORY_USAGE)
   Particles.Print_Max_Memory_Usage();
   #endif
 }
 
-//NOT USED: Get Average density on the Global dommain
-Real Grav3D::Get_Average_Density(){
-
+// NOT USED: Get Average density on the Global dommain
+Real Grav3D::Get_Average_Density()
+{
   Real dens_sum, dens_mean;
 
   #ifndef PARALLEL_OMP
-  dens_sum = Get_Average_Density_function( 0, nz_local );
+  dens_sum = Get_Average_Density_function(0, nz_local);
   #else
   dens_sum = 0;
   Real dens_sum_all[N_OMP_THREADS];
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+    #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Grid_Indxs( nz_local, n_omp_procs, omp_id, &g_start, &g_end  );
-    dens_sum_all[omp_id] = Get_Average_Density_function(  g_start, g_end );
-
+    Get_OMP_Grid_Indxs(nz_local, n_omp_procs, omp_id, &g_start, &g_end);
+    dens_sum_all[omp_id] = Get_Average_Density_function(g_start, g_end);
   }
-  for ( int i=0; i<N_OMP_THREADS; i++ ){
-    dens_sum += dens_sum_all[i];
+  for (Real dens_sum_all_element : dens_sum_all) {
+    dens_sum += dens_sum_all_element;
   }
   #endif
 
-  dens_mean = dens_sum /  ( nx_local * ny_local * nz_local);
+  dens_mean = dens_sum / (nx_local * ny_local * nz_local);
 
   Real dens_avrg_all;
   #ifdef MPI_CHOLLA
-  dens_avrg_all = ReduceRealAvg( dens_mean );
+  dens_avrg_all = ReduceRealAvg(dens_mean);
   #else
   dens_avrg_all = dens_mean;
   #endif
@@ -212,21 +214,20 @@ Real Grav3D::Get_Average_Density(){
   dens_avrg = dens_avrg_all;
 
   return dens_avrg_all;
-
 }
 
-//NOT USED: Function to get Average density on the Global dommain
-Real Grav3D::Get_Average_Density_function( int g_start, int g_end){
-
+// NOT USED: Function to get Average density on the Global dommain
+Real Grav3D::Get_Average_Density_function(int g_start, int g_end)
+{
   int nx = nx_local;
   int ny = ny_local;
   int nz = nz_local;
   int k, j, i, id;
-  Real dens_sum=0;
-  for( k=g_start; k<g_end; k++){
-    for( j=0; j<ny; j++){
-      for( i=0; i<nx; i++){
-        id = (i) + (j)*nx + (k)*nx*ny;
+  Real dens_sum = 0;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny; j++) {
+      for (i = 0; i < nx; i++) {
+        id = (i) + (j)*nx + (k)*nx * ny;
         dens_sum += F.density_h[id];
       }
     }
@@ -234,91 +235,91 @@ Real Grav3D::Get_Average_Density_function( int g_start, int g_end){
   return dens_sum;
 }
 
-#ifdef PARIS_TEST
+  #ifdef PARIS_TEST
 
-static inline Real sqr(const Real x) { return x*x; }
+static inline Real sqr(const Real x) { return x * x; }
 
-static inline Real f1(const Real x)
-{
-  return exp(-10.0*sqr(2.0*x-1.0))*sin(8.0*M_PI*x);
-}
+static inline Real f1(const Real x) { return exp(-10.0 * sqr(2.0 * x - 1.0)) * sin(8.0 * M_PI * x); }
 
 static inline Real d1(const Real x)
 {
-  return 16.0*exp(-10.0*sqr(2.0*x-1.0))*((400.0*x*x-400.0*x-4.0*M_PI*M_PI+95.0)*sin(8.0*M_PI*x)+(40.0*M_PI-80.0*M_PI*x)*cos(8.0*M_PI*x));
+  return 16.0 * exp(-10.0 * sqr(2.0 * x - 1.0)) *
+         ((400.0 * x * x - 400.0 * x - 4.0 * M_PI * M_PI + 95.0) * sin(8.0 * M_PI * x) +
+          (40.0 * M_PI - 80.0 * M_PI * x) * cos(8.0 * M_PI * x));
 }
 
-static inline Real periodicF(const Real x, const Real y, const Real z)
-{
-  return f1(x)*f1(y)*f1(z);
-}
+static inline Real periodicF(const Real x, const Real y, const Real z) { return f1(x) * f1(y) * f1(z); }
 
-static inline Real periodicD(const Real x, const Real y, const Real z, const Real ddlx, const Real ddly, const Real ddlz)
+static inline Real periodicD(const Real x, const Real y, const Real z, const Real ddlx, const Real ddly,
+                             const Real ddlz)
 {
-  return ddlx*d1(x)*f1(y)*f1(z)+ddly*f1(x)*d1(y)*f1(z)+ddlz*f1(x)*f1(y)*d1(z);
+  return ddlx * d1(x) * f1(y) * f1(z) + ddly * f1(x) * d1(y) * f1(z) + ddlz * f1(x) * f1(y) * d1(z);
 }
 
-static constexpr Real twoPi = 2.0*M_PI;
-static constexpr Real fourPi = 4.0*M_PI;
-static constexpr Real sixPi2 = 6.0*M_PI*M_PI;
+static constexpr Real twoPi  = 2.0 * M_PI;
+static constexpr Real fourPi = 4.0 * M_PI;
+static constexpr Real sixPi2 = 6.0 * M_PI * M_PI;
 
 static inline Real nonzeroF(const Real x, const Real y, const Real z)
 {
-  const Real sx = sin(twoPi*x);
-  const Real sy = sin(twoPi*y);
-  const Real sz = sin(twoPi*z);
-  const Real f = exp(-x*x-y*y-z*z);
-  return sx*sx*sx*sy*sy*sy*sz*sz*sz+f;
+  const Real sx = sin(twoPi * x);
+  const Real sy = sin(twoPi * y);
+  const Real sz = sin(twoPi * z);
+  const Real f  = exp(-x * x - y * y - z * z);
+  return sx * sx * sx * sy * sy * sy * sz * sz * sz + f;
 }
 
 static inline Real nonzeroD(const Real x, const Real y, const Real z, const Real ddlx, const Real ddly, const Real ddlz)
 {
-  const Real sx = sin(twoPi*x);
-  const Real sy = sin(twoPi*y);
-  const Real sz = sin(twoPi*z);
-  const Real sx3 = sx*sx*sx;
-  const Real sy3 = sy*sy*sy;
-  const Real sz3 = sz*sz*sz;
-  const Real f = exp(-x*x-y*y-z*z);
-  const Real df = ddlx*(4.0*x*x-2.0)+ddly*(4.0*y*y-2.0)+ddlz*(4.0*z*z-2.0);
-  return (ddlx*sx*(3.0*cos(fourPi*x)+1.0)*sy3*sz3
-          +ddly*sx3*sy*(3.0*cos(fourPi*y)+1.0)*sz3
-          +ddlz*sx3*sy3*sz*(3.0*cos(fourPi*z)+1.0))*sixPi2+f*df;
+  const Real sx  = sin(twoPi * x);
+  const Real sy  = sin(twoPi * y);
+  const Real sz  = sin(twoPi * z);
+  const Real sx3 = sx * sx * sx;
+  const Real sy3 = sy * sy * sy;
+  const Real sz3 = sz * sz * sz;
+  const Real f   = exp(-x * x - y * y - z * z);
+  const Real df  = ddlx * (4.0 * x * x - 2.0) + ddly * (4.0 * y * y - 2.0) + ddlz * (4.0 * z * z - 2.0);
+  return (ddlx * sx * (3.0 * cos(fourPi * x) + 1.0) * sy3 * sz3 +
+          ddly * sx3 * sy * (3.0 * cos(fourPi * y) + 1.0) * sz3 +
+          ddlz * sx3 * sy3 * sz * (3.0 * cos(fourPi * z) + 1.0)) *
+             sixPi2 +
+         f * df;
 }
-#endif
-
+  #endif
 
-#if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST)
-static void printDiff(const Real *p, const Real *q, const int nx, const int ny, const int nz, const int ng = N_GHOST_POTENTIAL, const bool plot = false)
+  #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST)
+static void printDiff(const Real *p, const Real *q, const int nx, const int ny, const int nz,
+                      const int ng = N_GHOST_POTENTIAL, const bool plot = false)
 {
   Real dMax = 0, dSum = 0, dSum2 = 0;
   Real qMax = 0, qSum = 0, qSum2 = 0;
-  #pragma omp parallel for reduction(max:dMax,qMax) reduction(+:dSum,dSum2,qSum,qSum2)
+    #pragma omp parallel for reduction(max : dMax, qMax) reduction(+ : dSum, dSum2, qSum, qSum2)
   for (int k = 0; k < nz; k++) {
     for (int j = 0; j < ny; j++) {
       for (int i = 0; i < nx; i++) {
-        const long ijk = i+ng+(nx+ng+ng)*(j+ng+(ny+ng+ng)*(k+ng));
+        const long ijk  = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng));
         const Real qAbs = fabs(q[ijk]);
-        qMax = std::max(qMax,qAbs);
+        qMax            = std::max(qMax, qAbs);
         qSum += qAbs;
-        qSum2 += qAbs*qAbs;
-        const Real d = fabs(q[ijk]-p[ijk]);
-        dMax = std::max(dMax,d);
+        qSum2 += qAbs * qAbs;
+        const Real d = fabs(q[ijk] - p[ijk]);
+        dMax         = std::max(dMax, d);
         dSum += d;
-        dSum2 += d*d;
+        dSum2 += d * d;
       }
     }
   }
-  Real maxs[2] = {qMax,dMax};
-  Real sums[4] = {qSum,qSum2,dSum,dSum2};
-  MPI_Allreduce(MPI_IN_PLACE,&maxs,2,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
-  MPI_Allreduce(MPI_IN_PLACE,&sums,4,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
-  chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n",sums[2]/sums[0],sqrt(sums[3]/sums[1]),maxs[1]/maxs[0]);
+  Real maxs[2] = {qMax, dMax};
+  Real sums[4] = {qSum, qSum2, dSum, dSum2};
+  MPI_Allreduce(MPI_IN_PLACE, &maxs, 2, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  MPI_Allreduce(MPI_IN_PLACE, &sums, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], sqrt(sums[3] / sums[1]),
+           maxs[1] / maxs[0]);
   fflush(stdout);
   if (!plot) return;
 
   printf("###\n");
-  #if 0
+    #if 0
   int kMax = -1;
   for (int k = 0; k < nz; k++) {
     for (int j = 0; j < ny; j++) {
@@ -329,414 +330,475 @@ static void printDiff(const Real *p, const Real *q, const int nx, const int ny,
       }
     }
     if (kMax > -1) {
-  #endif
-      const int k = nz/2;
-      for (int j = 0; j < ny+ng+ng; j++) {
-        for (int i = 0; i < nx+ng+ng; i++) {
-          const long ijk = i+(nx+ng+ng)*(j+(ny+ng+ng)*(k+ng));
-          printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]);
-        }
-        printf("\n");
-      }
-  #if 0
+    #endif
+  const int k = nz / 2;
+  for (int j = 0; j < ny + ng + ng; j++) {
+    for (int i = 0; i < nx + ng + ng; i++) {
+      const long ijk = i + (nx + ng + ng) * (j + (ny + ng + ng) * (k + ng));
+      printf("%d %d %g %g %g\n", j, i, q[ijk], p[ijk], q[ijk] - p[ijk]);
+    }
+    printf("\n");
+  }
+    #if 0
       break;
     }
   }
-  #endif
+    #endif
   fflush(stdout);
   MPI_Finalize();
   exit(0);
 }
-#endif
-
-
+  #endif
 
-//Initialize the Grav Object at the beginning of the simulation
-void Grid3D::Initialize_Gravity( struct parameters *P ){
-  chprintf( "\nInitializing Gravity... \n");
-  Grav.Initialize( H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal, H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost_potential_offset, P  );
-  chprintf( "Gravity Successfully Initialized. \n\n");
+// Initialize the Grav Object at the beginning of the simulation
+void Grid3D::Initialize_Gravity(struct Parameters *P)
+{
+  chprintf("\nInitializing Gravity... \n");
+  Grav.Initialize(H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal,
+                  H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz,
+                  H.n_ghost_potential_offset, P);
+  chprintf("Gravity Successfully Initialized. \n\n");
 
   if (P->bc_potential_type == 1) {
+    const int ng    = N_GHOST_POTENTIAL;
+    const int twoNG = ng + ng;
+    const int nk    = Grav.nz_local + twoNG;
+    const int nj    = Grav.ny_local + twoNG;
+    const int ni    = Grav.nx_local + twoNG;
+    const Real dr   = 0.5 - ng;
 
-    const int ng = N_GHOST_POTENTIAL;
-    const int twoNG = ng+ng;
-    const int nk = Grav.nz_local+twoNG;
-    const int nj = Grav.ny_local+twoNG;
-    const int ni = Grav.nx_local+twoNG;
-    const Real dr = 0.5-ng;
-
-    #ifdef PARIS_GALACTIC_TEST
+  #ifdef PARIS_GALACTIC_TEST
     chprintf("Analytic Test of Poisson Solvers:\n");
     std::vector<Real> exact(Grav.n_cells_potential);
     std::vector<Real> potential(Grav.n_cells_potential);
-    const Real scale = 4.0*M_PI*Grav.Gconst;
-    const Real ddx = 1.0/(scale*Grav.dx*Grav.dx);
-    const Real ddy = 1.0/(scale*Grav.dy*Grav.dy);
-    const Real ddz = 1.0/(scale*Grav.dz*Grav.dz);
+    const Real scale      = 4.0 * M_PI * Grav.Gconst;
+    const Real ddx        = 1.0 / (scale * Grav.dx * Grav.dx);
+    const Real ddy        = 1.0 / (scale * Grav.dy * Grav.dy);
+    const Real ddz        = 1.0 / (scale * Grav.dz * Grav.dz);
     const Real *const phi = Grav.F.potential_h;
-    const int nij = ni*nj;
-    const Real a0 = Galaxies::MW.phi_disk_D3D(0,0);
-    const Real da0 = 2.0/(25.0*scale);
+    const int nij         = ni * nj;
+    const Real a0         = galaxies::MW.phi_disk_D3D(0, 0);
+    const Real da0        = 2.0 / (25.0 * scale);
     #pragma omp parallel for
     for (int k = 0; k < nk; k++) {
-      const Real z = Grav.zMin+Grav.dz*(k+dr);
-      const int njk = nj*k;
+      const Real z  = Grav.zMin + Grav.dz * (k + dr);
+      const int njk = nj * k;
       for (int j = 0; j < nj; j++) {
-        const Real y = Grav.yMin+Grav.dy*(j+dr);
-        const Real yy = y*y;
-        const int nijk = ni*(j+njk);
+        const Real y   = Grav.yMin + Grav.dy * (j + dr);
+        const Real yy  = y * y;
+        const int nijk = ni * (j + njk);
         for (int i = 0; i < ni; i++) {
-          const Real x = Grav.xMin+Grav.dx*(i+dr);
-          const Real r = sqrt(x*x+yy);
-          const int ijk = i+nijk;
-          exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r,z);
+          const Real x  = Grav.xMin + Grav.dx * (i + dr);
+          const Real r  = sqrt(x * x + yy);
+          const int ijk = i + nijk;
+          exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = galaxies::MW.phi_disk_D3D(r, z);
         }
       }
     }
     #pragma omp parallel for
     for (int k = 0; k < Grav.nz_local; k++) {
-      const Real z = Grav.zMin+Grav.dz*(k+0.5);
-      const Real zz = z*z;
-      const int njk = Grav.ny_local*k;
+      const Real z  = Grav.zMin + Grav.dz * (k + 0.5);
+      const Real zz = z * z;
+      const int njk = Grav.ny_local * k;
       for (int j = 0; j < Grav.ny_local; j++) {
-        const Real y = Grav.yMin+Grav.dy*(j+0.5);
-        const Real yy = y*y;
-        const int nijk = Grav.nx_local*(j+njk);
+        const Real y   = Grav.yMin + Grav.dy * (j + 0.5);
+        const Real yy  = y * y;
+        const int nijk = Grav.nx_local * (j + njk);
         for (int i = 0; i < Grav.nx_local; i++) {
-          const Real x = Grav.xMin+Grav.dx*(i+0.5);
-          const Real r = sqrt(x*x+yy);
-          const int ijk = i+nijk;
-          const Real rr = x*x+yy+zz;
-          const Real f = a0*exp(-0.2*rr);
-          const Real df = da0*(15.0-2.0*rr)*f;
-          Grav.F.density_h[ijk] = Galaxies::MW.rho_disk_D3D(r,z)+df;
-          const int ib = i+ng+ni*(j+ng+nj*(k+ng));
+          const Real x          = Grav.xMin + Grav.dx * (i + 0.5);
+          const Real r          = sqrt(x * x + yy);
+          const int ijk         = i + nijk;
+          const Real rr         = x * x + yy + zz;
+          const Real f          = a0 * exp(-0.2 * rr);
+          const Real df         = da0 * (15.0 - 2.0 * rr) * f;
+          Grav.F.density_h[ijk] = galaxies::MW.rho_disk_D3D(r, z) + df;
+          const int ib          = i + ng + ni * (j + ng + nj * (k + ng));
           exact[ib] -= f;
         }
       }
     }
-    Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h,Grav.F.potential_h,Grav.Gconst,Galaxies::MW);
+    Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h, Grav.F.potential_h, Grav.Gconst, galaxies::MW);
     chprintf(" Paris Galactic");
-    printDiff(Grav.F.potential_h,exact.data(),Grav.nx_local,Grav.ny_local,Grav.nz_local);
-    Get_Potential_SOR(Grav.Gconst,0,0,P);
+    printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, Grav.nz_local);
+    Get_Potential_SOR(Grav.Gconst, 0, 0, P);
     chprintf(" SOR");
-    printDiff(Grav.F.potential_h,exact.data(),Grav.nx_local,Grav.ny_local,Grav.nz_local);
-    #endif
+    printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, Grav.nz_local);
+  #endif
 
-    #ifdef SOR
+  #ifdef SOR
     chprintf(" Initializing disk analytic potential\n");
     #pragma omp parallel for
     for (int k = 0; k < nk; k++) {
-      const Real z = Grav.zMin+Grav.dz*(k+dr);
-      const int njk = nj*k;
+      const Real z  = Grav.zMin + Grav.dz * (k + dr);
+      const int njk = nj * k;
       for (int j = 0; j < nj; j++) {
-        const Real y = Grav.yMin+Grav.dy*(j+dr);
-        const Real yy = y*y;
-        const int nijk = ni*(j+njk);
+        const Real y   = Grav.yMin + Grav.dy * (j + dr);
+        const Real yy  = y * y;
+        const int nijk = ni * (j + njk);
         for (int i = 0; i < ni; i++) {
-          const Real x = Grav.xMin+Grav.dx*(i+dr);
-          const Real r = sqrt(x*x+yy);
-          const int ijk = i+nijk;
-          Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r,z);
+          const Real x            = Grav.xMin + Grav.dx * (i + dr);
+          const Real r            = sqrt(x * x + yy);
+          const int ijk           = i + nijk;
+          Grav.F.potential_h[ijk] = galaxies::MW.phi_disk_D3D(r, z);
         }
       }
     }
-    #endif
+  #endif
   }
 }
 
-
-//Compute the Gravitational Potential by solving Poisson Equation
-void Grid3D::Compute_Gravitational_Potential( struct parameters *P ){
-
+// Compute the Gravitational Potential by solving Poisson Equation
+void Grid3D::Compute_Gravitational_Potential(struct Parameters *P)
+{
   #ifdef CPU_TIME
   Timer.Grav_Potential.Start();
   #endif
 
   #ifdef PARTICLES
-  //Copy the particles density to the grav_density array
-  Copy_Particles_Density_to_Gravity( *P );
+  // Copy the particles density to the grav_density array
+  Copy_Particles_Density_to_Gravity(*P);
   #endif
 
   #ifndef ONLY_PARTICLES
-  //Copy the hydro density to the grav_density array
+  // Copy the hydro density to the grav_density array
   Copy_Hydro_Density_to_Gravity();
   #endif
 
   #ifdef COSMOLOGY
-  //If using cosmology, set the gravitational constant to the one in the correct units
+  // If using cosmology, set the gravitational constant to the one in the
+  // correct units
   const Real Grav_Constant = Cosmo.cosmo_G;
-  const Real current_a = Cosmo.current_a;
-  const Real dens_avrg = Cosmo.rho_0_gas;
+  const Real current_a     = Cosmo.current_a;
+  const Real dens_avrg     = Cosmo.rho_0_gas;
   #else
   const Real Grav_Constant = Grav.Gconst;
   // If slowing the Sphere Collapse problem ( bc_potential_type=0 )
   const Real dens_avrg = (P->bc_potential_type == 0) ? H.sphere_background_density : 0;
-  const Real r0 = H.sphere_radius;
+  const Real r0        = H.sphere_radius;
   // Re-use current_a as the total mass of the sphere
-  const Real current_a = (H.sphere_density-dens_avrg)*4.0*M_PI*r0*r0*r0/3.0;
+  const Real current_a = (H.sphere_density - dens_avrg) * 4.0 * M_PI * r0 * r0 * r0 / 3.0;
   #endif
 
-  if ( !Grav.BC_FLAGS_SET ){
+  if (!Grav.BC_FLAGS_SET) {
     Grav.TRANSFER_POTENTIAL_BOUNDARIES = true;
-    Set_Boundary_Conditions( *P );
+    Set_Boundary_Conditions(*P);
     Grav.TRANSFER_POTENTIAL_BOUNDARIES = false;
     // #ifdef MPI_CHOLLA
-    // printf(" Pid: %d Gravity Boundary Flags: %d %d %d %d %d %d \n", procID, Grav.boundary_flags[0], Grav.boundary_flags[1], Grav.boundary_flags[2], Grav.boundary_flags[3], Grav.boundary_flags[4], Grav.boundary_flags[5] );
+    // printf(" Pid: %d Gravity Boundary Flags: %d %d %d %d %d %d \n", procID,
+    // Grav.boundary_flags[0], Grav.boundary_flags[1], Grav.boundary_flags[2],
+    // Grav.boundary_flags[3], Grav.boundary_flags[4], Grav.boundary_flags[5] );
     // #endif
     Grav.BC_FLAGS_SET = true;
   }
 
   #ifdef GRAV_ISOLATED_BOUNDARY_X
-  if ( Grav.boundary_flags[0] == 3 ) Compute_Potential_Boundaries_Isolated(0, P);
-  if ( Grav.boundary_flags[1] == 3 ) Compute_Potential_Boundaries_Isolated(1, P);
+  if (Grav.boundary_flags[0] == 3) {
+    Compute_Potential_Boundaries_Isolated(0, P);
+  }
+  if (Grav.boundary_flags[1] == 3) {
+    Compute_Potential_Boundaries_Isolated(1, P);
+  }
   // chprintf("Isolated X\n");
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  if ( Grav.boundary_flags[2] == 3 ) Compute_Potential_Boundaries_Isolated(2, P);
-  if ( Grav.boundary_flags[3] == 3 ) Compute_Potential_Boundaries_Isolated(3, P);
+  if (Grav.boundary_flags[2] == 3) {
+    Compute_Potential_Boundaries_Isolated(2, P);
+  }
+  if (Grav.boundary_flags[3] == 3) {
+    Compute_Potential_Boundaries_Isolated(3, P);
+  }
   // chprintf("Isolated Y\n");
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  if ( Grav.boundary_flags[4] == 3 ) Compute_Potential_Boundaries_Isolated(4, P);
-  if ( Grav.boundary_flags[5] == 3 ) Compute_Potential_Boundaries_Isolated(5, P);
+  if (Grav.boundary_flags[4] == 3) {
+    Compute_Potential_Boundaries_Isolated(4, P);
+  }
+  if (Grav.boundary_flags[5] == 3) {
+    Compute_Potential_Boundaries_Isolated(5, P);
+  }
   // chprintf("Isolated Z\n");
   #endif
 
-  //Solve Poisson Equation to compute the potential
-  //Poisson Equation: laplacian( phi ) = 4 * pi * G / scale_factor * ( dens - dens_average )
+  // Solve Poisson Equation to compute the potential
+  // Poisson Equation: laplacian( phi ) = 4 * pi * G / scale_factor * ( dens -
+  // dens_average )
   Real *input_density, *output_potential;
   #ifdef GRAVITY_GPU
-  input_density = Grav.F.density_d;
+  input_density    = Grav.F.density_d;
   output_potential = Grav.F.potential_d;
   #else
-  input_density = Grav.F.density_h;
+  input_density    = Grav.F.density_h;
   output_potential = Grav.F.potential_h;
   #endif
 
   #ifdef SOR
 
-  #ifdef PARIS_GALACTIC_TEST
-  #ifdef GRAVITY_GPU
-  #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST"
-  #endif
-  Grav.Poisson_solver_test.Get_Potential(input_density,output_potential,Grav_Constant,Galaxies::MW);
-  std::vector<Real> p(output_potential,output_potential+Grav.n_cells_potential);
-  Get_Potential_SOR( Grav_Constant, dens_avrg, current_a, P );
+    #ifdef PARIS_GALACTIC_TEST
+      #ifdef GRAVITY_GPU
+        #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST"
+      #endif
+  Grav.Poisson_solver_test.Get_Potential(input_density, output_potential, Grav_Constant, galaxies::MW);
+  std::vector<Real> p(output_potential, output_potential + Grav.n_cells_potential);
+  Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P);
   chprintf(" Paris vs SOR");
-  printDiff(p.data(),output_potential,Grav.nx_local,Grav.ny_local,Grav.nz_local,N_GHOST_POTENTIAL,false);
-  #else
-  Get_Potential_SOR( Grav_Constant, dens_avrg, current_a, P );
-  #endif
+  printDiff(p.data(), output_potential, Grav.nx_local, Grav.ny_local, Grav.nz_local, N_GHOST_POTENTIAL, false);
+    #else
+  Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P);
+    #endif
 
   #elif defined PARIS_GALACTIC
-  Grav.Poisson_solver.Get_Potential(input_density,output_potential,Grav_Constant,Galaxies::MW);
+  Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, galaxies::MW);
   #else
-  Grav.Poisson_solver.Get_Potential( input_density, output_potential, Grav_Constant, dens_avrg, current_a);
-  #endif//SOR
+  Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, dens_avrg, current_a);
+  #endif  // SOR
 
   #ifdef CPU_TIME
   Timer.Grav_Potential.End();
   #endif
-
 }
 
-#ifdef GRAVITY_ANALYTIC_COMP
-void Grid3D::Add_Analytic_Potential(struct parameters *P) {
-  #ifndef PARALLEL_OMP
-  Add_Analytic_Galaxy_Potential(0, Grav.nz_local, Galaxies::MW);
-  #else
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+  #ifdef GRAVITY_ANALYTIC_COMP
+void Grid3D::Setup_Analytic_Potential(struct Parameters *P)
+{
+    #ifndef PARALLEL_OMP
+  Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL, galaxies::MW);
+    #else
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end  );
+    Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end);
 
-    Add_Analytic_Galaxy_Potential(g_start, g_end, Galaxies::MW);
+    Setup_Analytic_Galaxy_Potential(g_start, g_end, galaxies::MW);
   }
-  #endif
+    #endif
+
+    #ifdef GRAVITY_GPU
+  GPU_Error_Check(cudaMemcpy(Grav.F.analytic_potential_d, Grav.F.analytic_potential_h,
+                             Grav.n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice));
+    #endif
 }
-#endif
 
+void Grid3D::Add_Analytic_Potential()
+{
+    #ifdef GRAVITY_GPU
+  Add_Analytic_Potential_GPU();
+    #else
+      #ifndef PARALLEL_OMP
+  Add_Analytic_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL);
+      #else
+        #pragma omp parallel num_threads(N_OMP_THREADS)
+  {
+    int omp_id, n_omp_procs;
+    int g_start, g_end;
 
-void Grid3D::Copy_Hydro_Density_to_Gravity_Function( int g_start, int g_end){
+    omp_id      = omp_get_thread_num();
+    n_omp_procs = omp_get_num_threads();
+    Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end);
+
+    Add_Analytic_Potential(g_start, g_end);
+  }
+      #endif  // PARALLEL_OMP
+    #endif    // GRAVITY_GPU else
+}
+  #endif  // GRAVITY_ANALYTIC_COMP
+
+void Grid3D::Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end)
+{
   // Copy the density array from hydro conserved to gravity density array
 
   Real dens;
   int i, j, k, id, id_grav;
-  for (k=g_start; k<g_end; k++) {
-    for (j=0; j<Grav.ny_local; j++) {
-      for (i=0; i<Grav.nx_local; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-        id_grav = (i) + (j)*Grav.nx_local + (k)*Grav.nx_local*Grav.ny_local;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < Grav.ny_local; j++) {
+      for (i = 0; i < Grav.nx_local; i++) {
+        id      = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
+        id_grav = (i) + (j)*Grav.nx_local + (k)*Grav.nx_local * Grav.ny_local;
 
         dens = C.density[id];
 
-        //If using cosmology the density must be rescaled to the physical coordinates
-        #ifdef COSMOLOGY
+  // If using cosmology the density must be rescaled to the physical coordinates
+  #ifdef COSMOLOGY
         dens *= Cosmo.rho_0_gas;
-        #endif
+  #endif
 
-        #ifdef PARTICLES
-        Grav.F.density_h[id_grav] += dens; //Hydro density is added AFTER partices density
-        #else
+  #ifdef PARTICLES
+        Grav.F.density_h[id_grav] += dens;  // Hydro density is added AFTER partices density
+  #else
         Grav.F.density_h[id_grav] = dens;
-        #endif
-
+  #endif
       }
     }
   }
 }
 
-void Grid3D::Copy_Hydro_Density_to_Gravity(){
-
+void Grid3D::Copy_Hydro_Density_to_Gravity()
+{
   #ifdef GRAVITY_GPU
   Copy_Hydro_Density_to_Gravity_GPU();
   #else
 
-  #ifndef PARALLEL_OMP
-  Copy_Hydro_Density_to_Gravity_Function( 0, Grav.nz_local );
-  #else
+    #ifndef PARALLEL_OMP
+  Copy_Hydro_Density_to_Gravity_Function(0, Grav.nz_local);
+    #else
 
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end  );
+    Get_OMP_Grid_Indxs(Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end);
 
-    Copy_Hydro_Density_to_Gravity_Function(g_start, g_end );
+    Copy_Hydro_Density_to_Gravity_Function(g_start, g_end);
   }
-  #endif //PARALLEL_OMP
-
-  #endif //GRAVITY_GPU
+    #endif  // PARALLEL_OMP
 
+  #endif  // GRAVITY_GPU
 }
 
+  #ifdef GRAVITY_ANALYTIC_COMP
+void Grid3D::Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy &gal)
+{
+  int nx = Grav.nx_local + 2 * N_GHOST_POTENTIAL;
+  int ny = Grav.ny_local + 2 * N_GHOST_POTENTIAL;
+  int nz = Grav.nz_local + 2 * N_GHOST_POTENTIAL;
+
+  // the fraction of the disk that's not modelled (and so its analytic
+  // contribution must be added)
+  Real non_mod_frac = 1 - SIMULATED_FRACTION;
+
+  int k, j, i, id;
+  Real x_pos, y_pos, z_pos, R;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny; j++) {
+      for (i = 0; i < nx; i++) {
+        id                              = i + j * nx + k * nx * ny;
+        x_pos                           = Grav.xMin + Grav.dx * (i - N_GHOST_POTENTIAL) + 0.5 * Grav.dx;
+        y_pos                           = Grav.yMin + Grav.dy * (j - N_GHOST_POTENTIAL) + 0.5 * Grav.dy;
+        z_pos                           = Grav.zMin + Grav.dz * (k - N_GHOST_POTENTIAL) + 0.5 * Grav.dz;
+        R                               = sqrt(x_pos * x_pos + y_pos * y_pos);
+        Grav.F.analytic_potential_h[id] = non_mod_frac * gal.phi_disk_D3D(R, z_pos) + gal.phi_halo_D3D(R, z_pos);
+      }
+    }
+  }
+}
 
-#ifdef GRAVITY_ANALYTIC_COMP
 /**
- * Adds a specified potential function to the potential calculated from solving the Poisson equation.
- * The raison d'etre is to solve the evolution of a system where not all particles are simulated.
+ * Adds a specified potential function to the potential calculated from solving
+ * the Poisson equation. External grav potential not due to simulated matter.
  */
-void Grid3D::Add_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal) {
-  int nx = Grav.nx_local + 2*N_GHOST_POTENTIAL;
-  int ny = Grav.ny_local + 2*N_GHOST_POTENTIAL;
-  int nz = Grav.nz_local + 2*N_GHOST_POTENTIAL;
-
-  // the fraction of the disk that's not modelled (and so its analytic contribution must be added)
-  //Real non_mod_frac = 0.0;
+void Grid3D::Add_Analytic_Potential(int g_start, int g_end)
+{
+  int nx = Grav.nx_local + 2 * N_GHOST_POTENTIAL;
+  int ny = Grav.ny_local + 2 * N_GHOST_POTENTIAL;
+  int nz = Grav.nz_local + 2 * N_GHOST_POTENTIAL;
 
   int k, j, i, id;
   Real x_pos, y_pos, z_pos, R;
-  for ( k=g_start; k<g_end; k++ ){
-    for ( j=0; j<ny; j++ ){
-      for ( i=0; i<nx; i++ ){
-        id = i + j*nx + k*nx*ny;
-        // does this also work with MPI?  is Grav.xMin equivalent to H.xblocal, for example.
-        x_pos = Grav.xMin + Grav.dx*(i-N_GHOST_POTENTIAL) + 0.5*Grav.dx;
-        y_pos = Grav.yMin + Grav.dy*(j-N_GHOST_POTENTIAL) + 0.5*Grav.dy;
-        z_pos = Grav.zMin + Grav.dz*(k-N_GHOST_POTENTIAL) + 0.5*Grav.dz;
-        R = sqrt(x_pos*x_pos + y_pos*y_pos);
-        //Grav.F.potential_h[id] += non_mod_frac*gal.phi_disk_D3D(R, z_pos) + gal.phi_halo_D3D(R, z_pos);
-        Grav.F.potential_h[id] += gal.phi_halo_D3D(R, z_pos);
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny; j++) {
+      for (i = 0; i < nx; i++) {
+        id = i + j * nx + k * nx * ny;
+        Grav.F.potential_h[id] += Grav.F.analytic_potential_h[id];
       }
     }
   }
 }
-#endif
+  #endif  // GRAVITY_ANALYTIC_COMP
 
+// Extrapolate the potential to obtain phi_n+1/2
+void Grid3D::Extrapolate_Grav_Potential_Function(int g_start, int g_end)
+{
+  // Use phi_n-1 and phi_n to extrapolate the potential and obtain phi_n+1/2
 
-//Extrapolate the potential to obtain phi_n+1/2
-void Grid3D::Extrapolate_Grav_Potential_Function( int g_start, int g_end ){
-  //Use phi_n-1 and phi_n to extrapolate the potential and obtain phi_n+1/2
-
-  int nx_pot = Grav.nx_local + 2*N_GHOST_POTENTIAL;
-  int ny_pot = Grav.ny_local + 2*N_GHOST_POTENTIAL;
-  int nz_pot = Grav.nz_local + 2*N_GHOST_POTENTIAL;
+  int nx_pot = Grav.nx_local + 2 * N_GHOST_POTENTIAL;
+  int ny_pot = Grav.ny_local + 2 * N_GHOST_POTENTIAL;
+  int nz_pot = Grav.nz_local + 2 * N_GHOST_POTENTIAL;
 
   int n_ghost_grid, nx_grid, ny_grid, nz_grid;
   Real *potential_in, *potential_out;
 
-  //Input potential
+  // Input potential
   potential_in = Grav.F.potential_h;
 
-  //Output potential
+  // Output potential
   potential_out = C.Grav_potential;
-  //n_ghost for the output potential
+  // n_ghost for the output potential
   n_ghost_grid = H.n_ghost;
 
-  //Grid size for the output potential
-  nx_grid = Grav.nx_local + 2*n_ghost_grid;
-  ny_grid = Grav.ny_local + 2*n_ghost_grid;
-  nz_grid = Grav.nz_local + 2*n_ghost_grid;
+  // Grid size for the output potential
+  nx_grid = Grav.nx_local + 2 * n_ghost_grid;
+  ny_grid = Grav.ny_local + 2 * n_ghost_grid;
+  nz_grid = Grav.nz_local + 2 * n_ghost_grid;
 
   int nGHST = n_ghost_grid - N_GHOST_POTENTIAL;
   Real pot_now, pot_prev, pot_extrp;
   int k, j, i, id_pot, id_grid;
-  for ( k=g_start; k<g_end; k++ ){
-    for ( j=0; j<ny_pot; j++ ){
-      for ( i=0; i<nx_pot; i++ ){
-        id_pot = i + j*nx_pot + k*nx_pot*ny_pot;
-        id_grid = (i+nGHST) + (j+nGHST)*nx_grid + (k+nGHST)*nx_grid*ny_grid;
-        pot_now = potential_in[id_pot]; //Potential at the n-th timestep
-        if ( Grav.INITIAL ){
-          pot_extrp = pot_now; //The first timestep the extrapolated potential is phi_0
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny_pot; j++) {
+      for (i = 0; i < nx_pot; i++) {
+        id_pot  = i + j * nx_pot + k * nx_pot * ny_pot;
+        id_grid = (i + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * nx_grid * ny_grid;
+        pot_now = potential_in[id_pot];  // Potential at the n-th timestep
+        if (Grav.INITIAL) {
+          pot_extrp = pot_now;  // The first timestep the extrapolated potential
+                                // is phi_0
         } else {
-          pot_prev = Grav.F.potential_1_h[id_pot]; //Potential at the (n-1)-th timestep ( previous step )
-          //Compute the extrapolated potential from phi_n-1 and phi_n
-          pot_extrp = pot_now  + 0.5 * Grav.dt_now * ( pot_now - pot_prev  ) / Grav.dt_prev;
+          pot_prev = Grav.F.potential_1_h[id_pot];  // Potential at the (n-1)-th
+                                                    // timestep ( previous step )
+          // Compute the extrapolated potential from phi_n-1 and phi_n
+          pot_extrp = pot_now + 0.5 * Grav.dt_now * (pot_now - pot_prev) / Grav.dt_prev;
         }
 
-        #ifdef COSMOLOGY
-        //For cosmological simulation the potential is tranformed to 'comoving coordinates'
+  #ifdef COSMOLOGY
+        // For cosmological simulation the potential is tranformed to 'comoving
+        // coordinates'
         pot_extrp *= Cosmo.current_a * Cosmo.current_a / Cosmo.phi_0_gas;
-        #endif
+  #endif
 
-        //Save the extrapolated potential
+        // Save the extrapolated potential
         potential_out[id_grid] = pot_extrp;
-        //Set phi_n-1 = phi_n, to use it during the next step
+        // Set phi_n-1 = phi_n, to use it during the next step
         Grav.F.potential_1_h[id_pot] = pot_now;
       }
     }
   }
 }
 
-//Call the function to extrapolate the potential
-void Grid3D::Extrapolate_Grav_Potential(){
-
+// Call the function to extrapolate the potential
+void Grid3D::Extrapolate_Grav_Potential()
+{
   #ifdef GRAVITY_GPU
   Extrapolate_Grav_Potential_GPU();
   #else
 
-  #ifndef PARALLEL_OMP
-  Extrapolate_Grav_Potential_Function( 0, Grav.nz_local + 2*N_GHOST_POTENTIAL );
-  #else
+    #ifndef PARALLEL_OMP
+  Extrapolate_Grav_Potential_Function(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL);
+    #else
 
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Grid_Indxs( Grav.nz_local + 2*N_GHOST_POTENTIAL, n_omp_procs, omp_id,  &g_start, &g_end  );
+    Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end);
 
-    Extrapolate_Grav_Potential_Function( g_start, g_end );
+    Extrapolate_Grav_Potential_Function(g_start, g_end);
   }
-  #endif // PARALLEL_OMP
+    #endif  // PARALLEL_OMP
 
-  #endif //GRAVITY_GPU
+  #endif  // GRAVITY_GPU
 
-  //After the first timestep the INITIAL flag is set to false, that way the potential is properly extrapolated afterwards
+  // After the first timestep the INITIAL flag is set to false, that way the
+  // potential is properly extrapolated afterwards
   Grav.INITIAL = false;
 }
 
-
-#endif //GRAVITY
+#endif  // GRAVITY
diff --git a/src/gravity/gravity_functions_gpu.cu b/src/gravity/gravity_functions_gpu.cu
index 51c9f0dbc..b92d19084 100644
--- a/src/gravity/gravity_functions_gpu.cu
+++ b/src/gravity/gravity_functions_gpu.cu
@@ -1,84 +1,92 @@
 #if defined(GRAVITY) && defined(GRAVITY_GPU)
 
-#include "../grid/grid3D.h"
-#include "../global/global.h"
-#include "../io/io.h"
-#include "../utils/error_handling.h"
-#include <cstring>
+  #include <cstring>
 
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../utils/error_handling.h"
 
-void Grav3D::AllocateMemory_GPU(){
-
-  CudaSafeCall( cudaMalloc((void**)&F.density_d,  n_cells*sizeof(Real)) );
-  CudaSafeCall( cudaMalloc((void**)&F.potential_d,   n_cells_potential*sizeof(Real)) );
-  CudaSafeCall( cudaMalloc((void**)&F.potential_1_d, n_cells_potential*sizeof(Real)) );
+void Grav3D::AllocateMemory_GPU()
+{
+  GPU_Error_Check(cudaMalloc((void **)&F.density_d, n_cells * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc((void **)&F.potential_d, n_cells_potential * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc((void **)&F.potential_1_d, n_cells_potential * sizeof(Real)));
 
   #ifdef GRAVITY_GPU
 
-  #ifdef GRAV_ISOLATED_BOUNDARY_X
-  CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)) );
-  CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)) );
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)) );
-  CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)) );
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)) );
-  CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)) );
-  #endif
-
-  #endif//GRAVITY_GPU
-
-  chprintf( "Allocated Gravity GPU memory \n" );
+    #ifdef GRAVITY_ANALYTIC_COMP
+  GPU_Error_Check(cudaMalloc((void **)&F.analytic_potential_d, n_cells_potential * sizeof(Real)));
+    #endif
+
+    #ifdef GRAV_ISOLATED_BOUNDARY_X
+  GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real)));
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Y
+  GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real)));
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Z
+  GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real)));
+    #endif
+
+  #endif  // GRAVITY_GPU
+
+  chprintf("Allocated Gravity GPU memory \n");
 }
 
-
-void Grav3D::FreeMemory_GPU(void){
-
-  cudaFree( F.density_d );
-  cudaFree( F.potential_d );
-  cudaFree( F.potential_1_d );
-
+void Grav3D::FreeMemory_GPU(void)
+{
+  cudaFree(F.density_d);
+  cudaFree(F.potential_d);
+  cudaFree(F.potential_1_d);
 
   #ifdef GRAVITY_GPU
 
-  #ifdef GRAV_ISOLATED_BOUNDARY_X
-  cudaFree( F.pot_boundary_x0_d);
-  cudaFree( F.pot_boundary_x1_d);
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  cudaFree( F.pot_boundary_y0_d);
-  cudaFree( F.pot_boundary_y1_d);
-  #endif
-  #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  cudaFree( F.pot_boundary_z0_d);
-  cudaFree( F.pot_boundary_z1_d);
-  #endif
-
-  #endif //GRAVITY_GPU
-
+    #ifdef GRAVITY_ANALYTIC_COMP
+  cudaFree(F.analytic_potential_d);
+    #endif
+
+    #ifdef GRAV_ISOLATED_BOUNDARY_X
+  cudaFree(F.pot_boundary_x0_d);
+  cudaFree(F.pot_boundary_x1_d);
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Y
+  cudaFree(F.pot_boundary_y0_d);
+  cudaFree(F.pot_boundary_y1_d);
+    #endif
+    #ifdef GRAV_ISOLATED_BOUNDARY_Z
+  cudaFree(F.pot_boundary_z0_d);
+  cudaFree(F.pot_boundary_z1_d);
+    #endif
+
+  #endif  // GRAVITY_GPU
 }
 
-void __global__ Copy_Hydro_Density_to_Gravity_Kernel( Real *src_density_d, Real *dst_density_d, int nx_local, int ny_local, int nz_local, int n_ghost, Real cosmo_rho_0_gas   ){
-
+void __global__ Copy_Hydro_Density_to_Gravity_Kernel(Real *src_density_d, Real *dst_density_d, int nx_local,
+                                                     int ny_local, int nz_local, int n_ghost, Real cosmo_rho_0_gas)
+{
   int tid_x, tid_y, tid_z, tid_grid, tid_dens;
   tid_x = blockIdx.x * blockDim.x + threadIdx.x;
   tid_y = blockIdx.y * blockDim.y + threadIdx.y;
   tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
-  if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local ) return;
+  if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) {
+    return;
+  }
 
-  tid_dens = tid_x + tid_y*nx_local + tid_z*nx_local*ny_local;
+  tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local;
 
   tid_x += n_ghost;
   tid_y += n_ghost;
   tid_z += n_ghost;
 
   int nx_grid, ny_grid;
-  nx_grid = nx_local + 2*n_ghost;
-  ny_grid = ny_local + 2*n_ghost;
-  tid_grid = tid_x + tid_y*nx_grid + tid_z*nx_grid*ny_grid;
+  nx_grid  = nx_local + 2 * n_ghost;
+  ny_grid  = ny_local + 2 * n_ghost;
+  tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid;
 
   Real dens;
   dens = src_density_d[tid_grid];
@@ -88,27 +96,24 @@ void __global__ Copy_Hydro_Density_to_Gravity_Kernel( Real *src_density_d, Real
   #endif
 
   #ifdef PARTICLES
-  dst_density_d[tid_dens] += dens; //Hydro density is added AFTER partices density
+  dst_density_d[tid_dens] += dens;  // Hydro density is added AFTER partices density
   #else
-  dst_density_d[tid_dens]  = dens;
+  dst_density_d[tid_dens] = dens;
   #endif
-
 }
 
-void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){
-
+void Grid3D::Copy_Hydro_Density_to_Gravity_GPU()
+{
   int nx_local, ny_local, nz_local, n_ghost;
   nx_local = Grav.nx_local;
   ny_local = Grav.ny_local;
   nz_local = Grav.nz_local;
   n_ghost  = H.n_ghost;
 
-
-
   // set values for GPU kernels
-  int tpb_x = TPBX_GRAV;
-  int tpb_y = TPBY_GRAV;
-  int tpb_z = TPBZ_GRAV;
+  int tpb_x   = TPBX_GRAV;
+  int tpb_y   = TPBY_GRAV;
+  int tpb_z   = TPBZ_GRAV;
   int ngrid_x = (nx_local - 1) / tpb_x + 1;
   int ngrid_y = (ny_local - 1) / tpb_y + 1;
   int ngrid_z = (nz_local - 1) / tpb_z + 1;
@@ -125,70 +130,132 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){
   cosmo_rho_0_gas = 1.0;
   #endif
 
-  //Copy the density from the device array to the Poisson input density array
-  hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, dim3dBlock, 0, 0,  C.d_density, Grav.F.density_d, nx_local, ny_local, nz_local, n_ghost, cosmo_rho_0_gas);
+  // Copy the density from the device array to the Poisson input density array
+  hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_density, Grav.F.density_d,
+                     nx_local, ny_local, nz_local, n_ghost, cosmo_rho_0_gas);
+}
 
+  #if defined(GRAVITY_ANALYTIC_COMP)
+void __global__ Add_Analytic_Potential_Kernel(Real *analytic_d, Real *potential_d, int nx_pot, int ny_pot, int nz_pot)
+{
+  int tid_x, tid_y, tid_z, tid;
+  tid_x = blockIdx.x * blockDim.x + threadIdx.x;
+  tid_y = blockIdx.y * blockDim.y + threadIdx.y;
+  tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
+  if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) {
+    return;
+  }
+
+  tid = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot;
+
+  potential_d[tid] += analytic_d[tid];
+  /*
+  if (tid_x < 10 && tid_y == (ny_pot/2) && tid_z == (nz_pot/2)) {
+    //printf("potential_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z,
+  potential_d[tid]); printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y,
+  tid_z, analytic_d[tid]);
+  }
+  */
 }
 
-void __global__ Extrapolate_Grav_Potential_Kernel( Real *dst_potential, Real *src_potential_0, Real *src_potential_1,
-        int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, int nz_grid, int n_offset,
-        Real dt_now, Real dt_prev, bool INITIAL,  Real cosmo_factor ){
+void Grid3D::Add_Analytic_Potential_GPU()
+{
+  int nx_pot, ny_pot, nz_pot;
+  nx_pot = Grav.nx_local + 2 * N_GHOST_POTENTIAL;
+  ny_pot = Grav.ny_local + 2 * N_GHOST_POTENTIAL;
+  nz_pot = Grav.nz_local + 2 * N_GHOST_POTENTIAL;
+
+  // set values for GPU kernels
+  int tpb_x = TPBX_GRAV;
+  int tpb_y = TPBY_GRAV;
+  int tpb_z = TPBZ_GRAV;
+
+  int ngrid_x = (nx_pot - 1) / tpb_x + 1;
+  int ngrid_y = (ny_pot - 1) / tpb_y + 1;
+  int ngrid_z = (nz_pot - 1) / tpb_z + 1;
+
+  // number of blocks per 1D grid
+  dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z);
+  //  number of threads per 1D block
+  dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
+  // Copy the analytic potential from the device array to the device potential
+  // array
+  hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.analytic_potential_d,
+                     Grav.F.potential_d, nx_pot, ny_pot, nz_pot);
+  cudaDeviceSynchronize();
+  /*gpuFor(10,
+    GPU_LAMBDA(const int i) {
+        printf("potential_after_analytic[%d, %d, %d] = %.4e\n", i, ny_pot/2,
+  nz_pot/2, Grav.F.potential_d[i + nx_pot*ny_pot/2 + nx_pot*ny_pot*nz_pot/2]);
+    }
+  );*/
+}
+  #endif  // GRAVITY_ANALYTIC_COMP
+
+void __global__ Extrapolate_Grav_Potential_Kernel(Real *dst_potential, Real *src_potential_0, Real *src_potential_1,
+                                                  int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid,
+                                                  int nz_grid, int n_offset, Real dt_now, Real dt_prev, bool INITIAL,
+                                                  Real cosmo_factor)
+{
   int tid_x, tid_y, tid_z, tid_grid, tid_pot;
   tid_x = blockIdx.x * blockDim.x + threadIdx.x;
   tid_y = blockIdx.y * blockDim.y + threadIdx.y;
   tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
-  if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot ) return;
+  if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) {
+    return;
+  }
 
-  tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot;
+  tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot;
 
   tid_x += n_offset;
   tid_y += n_offset;
   tid_z += n_offset;
 
-  tid_grid = tid_x + tid_y*nx_grid + tid_z*nx_grid*ny_grid;
+  tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid;
 
   Real pot_now, pot_prev, pot_extrp;
-  pot_now = src_potential_0[tid_pot]; //Potential at the n-th timestep
-  if ( INITIAL ){
-    pot_extrp = pot_now; //The first timestep the extrapolated potential is phi_0
+  pot_now = src_potential_0[tid_pot];  // Potential at the n-th timestep
+  if (INITIAL) {
+    pot_extrp = pot_now;  // The first timestep the extrapolated potential is phi_0
   } else {
-    pot_prev = src_potential_1[tid_pot]; //Potential at the (n-1)-th timestep ( previous step )
-    //Compute the extrapolated potential from phi_n-1 and phi_n
-    pot_extrp = pot_now  + 0.5 * dt_now * ( pot_now - pot_prev  ) / dt_prev;
+    pot_prev = src_potential_1[tid_pot];  // Potential at the (n-1)-th timestep
+                                          // ( previous step )
+    // Compute the extrapolated potential from phi_n-1 and phi_n
+    pot_extrp = pot_now + 0.5 * dt_now * (pot_now - pot_prev) / dt_prev;
   }
 
   #ifdef COSMOLOGY
-  //For cosmological simulation the potential is transformed to 'comoving coordinates'
+  // For cosmological simulation the potential is transformed to 'comoving
+  // coordinates'
   pot_extrp *= cosmo_factor;
   #endif
 
-  //Save the extrapolated potential
+  // Save the extrapolated potential
   dst_potential[tid_grid] = pot_extrp;
-  //Set phi_n-1 = phi_n, to use it during the next step
+  // Set phi_n-1 = phi_n, to use it during the next step
   src_potential_1[tid_pot] = pot_now;
 }
 
-void Grid3D::Extrapolate_Grav_Potential_GPU(){
-
+void Grid3D::Extrapolate_Grav_Potential_GPU()
+{
   int nx_pot, ny_pot, nz_pot;
-  nx_pot = Grav.nx_local + 2*N_GHOST_POTENTIAL;
-  ny_pot = Grav.ny_local + 2*N_GHOST_POTENTIAL;
-  nz_pot = Grav.nz_local + 2*N_GHOST_POTENTIAL;
+  nx_pot = Grav.nx_local + 2 * N_GHOST_POTENTIAL;
+  ny_pot = Grav.ny_local + 2 * N_GHOST_POTENTIAL;
+  nz_pot = Grav.nz_local + 2 * N_GHOST_POTENTIAL;
 
   int n_ghost_grid, nx_grid, ny_grid, nz_grid;
   n_ghost_grid = H.n_ghost;
-  nx_grid = Grav.nx_local + 2*n_ghost_grid;
-  ny_grid = Grav.ny_local + 2*n_ghost_grid;
-  nz_grid = Grav.nz_local + 2*n_ghost_grid;
+  nx_grid      = Grav.nx_local + 2 * n_ghost_grid;
+  ny_grid      = Grav.ny_local + 2 * n_ghost_grid;
+  nz_grid      = Grav.nz_local + 2 * n_ghost_grid;
 
   int n_offset = n_ghost_grid - N_GHOST_POTENTIAL;
 
-
   Real dt_now, dt_prev, cosmo_factor;
-  dt_now = Grav.dt_now;
+  dt_now  = Grav.dt_now;
   dt_prev = Grav.dt_prev;
 
   #ifdef COSMOLOGY
@@ -198,9 +265,9 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){
   #endif
 
   // set values for GPU kernels
-  int tpb_x = TPBX_GRAV;
-  int tpb_y = TPBY_GRAV;
-  int tpb_z = TPBZ_GRAV;
+  int tpb_x   = TPBX_GRAV;
+  int tpb_y   = TPBY_GRAV;
+  int tpb_z   = TPBZ_GRAV;
   int ngrid_x = (nx_pot - 1) / tpb_x + 1;
   int ngrid_y = (ny_pot - 1) / tpb_y + 1;
   int ngrid_z = (nz_pot - 1) / tpb_z + 1;
@@ -209,20 +276,18 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){
   //  number of threads per 1D block
   dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
-  hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential, Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid, n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor );
-
+  hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential,
+                     Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid,
+                     n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor);
 }
 
-#ifdef PARTICLES_CPU
-void Grid3D::Copy_Potential_From_GPU(){
-  CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) );
+  #ifdef PARTICLES_CPU
+void Grid3D::Copy_Potential_From_GPU()
+{
+  GPU_Error_Check(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real),
+                             cudaMemcpyDeviceToHost));
   cudaDeviceSynchronize();
 }
-#endif //PARTICLES_CPU
-
-
-
-
-
+  #endif  // PARTICLES_CPU
 
-#endif //GRAVITY
+#endif  // GRAVITY
diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp
new file mode 100644
index 000000000..d2a09e24d
--- /dev/null
+++ b/src/gravity/gravity_restart.cpp
@@ -0,0 +1,102 @@
+// Special functions needed to make restart (init=Read_Grid) consistent with
+// running continuously
+
+#include <cstdio>
+
+#ifdef GRAVITY
+  #include "../gravity/grav3D.h"
+  #include "../io/io.h"
+#endif
+
+#ifdef MPI_CHOLLA
+// provides procID
+  #include "../mpi/mpi_routines.h"
+#endif  // MPI_CHOLLA
+
+#ifdef HDF5
+  #include <hdf5.h>
+#endif
+
+void Gravity_Restart_Filename(char* filename, char* dirname, int nfile)
+{
+#ifdef MPI_CHOLLA
+  sprintf(filename, "%s%d_gravity.h5.%d", dirname, nfile, procID);
+#else
+  sprintf(filename, "%s%d_gravity.h5", dirname, nfile);
+#endif
+}
+
+#if defined(GRAVITY) && defined(HDF5)
+void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile)
+{
+  H5open();
+  char filename[MAXLEN];
+  Gravity_Restart_Filename(filename, P->indir, nfile);
+  hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+
+  // Read dt_now
+  hid_t attribute_id = H5Aopen(file_id, "dt_now", H5P_DEFAULT);
+  herr_t status      = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &dt_now);
+  status             = H5Aclose(attribute_id);
+
+  // Read potential and copy to device to be used as potential n-1
+  Read_HDF5_Dataset(file_id, F.potential_1_h, "/potential");
+  #ifdef GRAVITY_GPU
+  GPU_Error_Check(
+      cudaMemcpy(F.potential_1_d, F.potential_1_h, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice));
+  #endif
+
+  H5Fclose(file_id);
+  H5close();
+
+  // Set INITIAL to false
+  INITIAL = false;
+}
+
+void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile)
+{
+  H5open();
+  std::string filename = FnameTemplate(*P).format_fname(nfile, "_gravity");
+  hid_t file_id        = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+  // Write dt_now
+  hsize_t attr_dims  = 1;
+  hid_t dataspace_id = H5Screate_simple(1, &attr_dims, NULL);
+
+  hid_t attribute_id = H5Acreate(file_id, "dt_now", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  herr_t status      = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &dt_now);
+  status             = H5Aclose(attribute_id);
+
+  status = H5Sclose(dataspace_id);
+
+  // Copy device to host if needed
+  #ifdef GRAVITY_GPU
+  GPU_Error_Check(
+      cudaMemcpy(F.potential_1_h, F.potential_1_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost));
+  #endif
+
+  // Write potential
+  hsize_t dims[1];
+  dims[0] = n_cells_potential;
+
+  dataspace_id = H5Screate_simple(1, dims, NULL);
+  Write_HDF5_Dataset(file_id, dataspace_id, F.potential_1_h, "/potential");
+  H5Sclose(dataspace_id);
+
+  H5Fclose(file_id);
+
+  H5close();
+}
+
+#elif defined(GRAVITY)
+// Do nothing
+void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile)
+{
+  chprintf("WARNING from file %s line %d: Read_Restart_HDF5 did nothing", __FILE__, __LINE__);
+}
+
+void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile)
+{
+  chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", __FILE__, __LINE__);
+}
+#endif
diff --git a/src/gravity/paris/HenryPeriodic.cu b/src/gravity/paris/HenryPeriodic.cu
index cf82c2d38..1602ca737 100644
--- a/src/gravity/paris/HenryPeriodic.cu
+++ b/src/gravity/paris/HenryPeriodic.cu
@@ -1,103 +1,103 @@
 #ifdef PARIS
 
-#include "HenryPeriodic.hpp"
+  #include <algorithm>
+  #include <cassert>
+  #include <climits>
+  #include <cmath>
 
-#include <algorithm>
-#include <cassert>
-#include <climits>
-#include <cmath>
+  #include "HenryPeriodic.hpp"
 
-HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]):
-  idi_(id[0]),
-  idj_(id[1]),
-  idk_(id[2]),
-  mi_(m[0]),
-  mj_(m[1]),
-  mk_(m[2]),
-  nh_(n[2]/2+1),
-  ni_(n[0]),
-  nj_(n[1]),
-  nk_(n[2]),
-  bytes_(0)
+HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3])
+    : idi_(id[0]),
+      idj_(id[1]),
+      idk_(id[2]),
+      mi_(m[0]),
+      mj_(m[1]),
+      mk_(m[2]),
+      nh_(n[2] / 2 + 1),
+      ni_(n[0]),
+      nj_(n[1]),
+      nk_(n[2]),
+      bytes_(0)
 {
   // Pencil sub-decomposition within a 3D block
   mq_ = int(round(sqrt(mk_)));
-  while (mk_%mq_) mq_--;
-  mp_ = mk_/mq_;
-  assert(mp_*mq_ == mk_);
+  while (mk_ % mq_) {
+    mq_--;
+  }
+  mp_ = mk_ / mq_;
+  assert(mp_ * mq_ == mk_);
 
-  idp_ = idk_/mq_;
-  idq_ = idk_%mq_;
+  idp_ = idk_ / mq_;
+  idq_ = idk_ % mq_;
 
   // Communicators of tasks within pencils in each dimension
   {
-    const int color = idi_*mj_+idj_;
-    const int key = idk_;
-    MPI_Comm_split(MPI_COMM_WORLD,color,key,&commK_);
+    const int color = idi_ * mj_ + idj_;
+    const int key   = idk_;
+    MPI_Comm_split(MPI_COMM_WORLD, color, key, &commK_);
   }
   {
-    const int color = idi_*mp_+idp_;
-    const int key = idj_*mq_+idq_;
-    MPI_Comm_split(MPI_COMM_WORLD,color,key,&commJ_);
+    const int color = idi_ * mp_ + idp_;
+    const int key   = idj_ * mq_ + idq_;
+    MPI_Comm_split(MPI_COMM_WORLD, color, key, &commJ_);
   }
   {
-    const int color = idj_*mq_+idq_;
-    const int key = idi_*mp_+idp_;
-    MPI_Comm_split(MPI_COMM_WORLD,color,key,&commI_);
+    const int color = idj_ * mq_ + idq_;
+    const int key   = idi_ * mp_ + idp_;
+    MPI_Comm_split(MPI_COMM_WORLD, color, key, &commI_);
   }
 
   // Maximum numbers of elements for various decompositions and dimensions
-  
-  dh_ = (nh_+mk_-1)/mk_;
-  di_ = (ni_+mi_-1)/mi_;
-  dj_ = (nj_+mj_-1)/mj_;
-  dk_ = (nk_+mk_-1)/mk_;
 
-  dip_ = (di_+mp_-1)/mp_;
-  djq_ = (dj_+mq_-1)/mq_;
-  const int mjq = mj_*mq_;
-  dhq_ = (nh_+mjq-1)/mjq;
-  const int mip = mi_*mp_;
-  djp_ = (nj_+mip-1)/mip;
+  dh_ = (nh_ + mk_ - 1) / mk_;
+  di_ = (ni_ + mi_ - 1) / mi_;
+  dj_ = (nj_ + mj_ - 1) / mj_;
+  dk_ = (nk_ + mk_ - 1) / mk_;
+
+  dip_          = (di_ + mp_ - 1) / mp_;
+  djq_          = (dj_ + mq_ - 1) / mq_;
+  const int mjq = mj_ * mq_;
+  dhq_          = (nh_ + mjq - 1) / mjq;
+  const int mip = mi_ * mp_;
+  djp_          = (nj_ + mip - 1) / mip;
 
   // Maximum memory needed by work arrays
-  
-  const long nMax = std::max(
-    { long(di_)*long(dj_)*long(dk_),
-      long(mp_)*long(mq_)*long(dip_)*long(djq_)*long(dk_),
-      long(2)*long(dip_)*long(djq_)*long(mk_)*long(dh_),
-      long(2)*long(dip_)*long(mp_)*long(djq_)*long(mq_)*long(dh_),
-      long(2)*long(dip_)*long(djq_)*long(mjq)*long(dhq_),
-      long(2)*long(dip_)*long(dhq_)*long(mip)*long(djp_),
-      long(2)*djp_*long(dhq_)*long(mip)*long(dip_)
-    });
+
+  const long nMax =
+      std::max({long(di_) * long(dj_) * long(dk_), long(mp_) * long(mq_) * long(dip_) * long(djq_) * long(dk_),
+                long(2) * long(dip_) * long(djq_) * long(mk_) * long(dh_),
+                long(2) * long(dip_) * long(mp_) * long(djq_) * long(mq_) * long(dh_),
+                long(2) * long(dip_) * long(djq_) * long(mjq) * long(dhq_),
+                long(2) * long(dip_) * long(dhq_) * long(mip) * long(djp_),
+                long(2) * djp_ * long(dhq_) * long(mip) * long(dip_)});
   assert(nMax <= INT_MAX);
-  bytes_ = nMax*sizeof(double);
+  bytes_ = nMax * sizeof(double);
 
   // FFT objects
-  CHECK(cufftPlanMany(&c2ci_,1,&ni_,&ni_,1,ni_,&ni_,1,ni_,CUFFT_Z2Z,djp_*dhq_));
-  CHECK(cufftPlanMany(&c2cj_,1,&nj_,&nj_,1,nj_,&nj_,1,nj_,CUFFT_Z2Z,dip_*dhq_));
-  CHECK(cufftPlanMany(&c2rk_,1,&nk_,&nh_,1,nh_,&nk_,1,nk_,CUFFT_Z2D,dip_*djq_));
-  CHECK(cufftPlanMany(&r2ck_,1,&nk_,&nk_,1,nk_,&nh_,1,nh_,CUFFT_D2Z,dip_*djq_));
+  GPU_Error_Check(cufftPlanMany(&c2ci_, 1, &ni_, &ni_, 1, ni_, &ni_, 1, ni_, CUFFT_Z2Z, djp_ * dhq_));
+  GPU_Error_Check(cufftPlanMany(&c2cj_, 1, &nj_, &nj_, 1, nj_, &nj_, 1, nj_, CUFFT_Z2Z, dip_ * dhq_));
+  GPU_Error_Check(cufftPlanMany(&c2rk_, 1, &nk_, &nh_, 1, nh_, &nk_, 1, nk_, CUFFT_Z2D, dip_ * djq_));
+  GPU_Error_Check(cufftPlanMany(&r2ck_, 1, &nk_, &nk_, 1, nk_, &nh_, 1, nh_, CUFFT_D2Z, dip_ * djq_));
 
-#ifndef MPI_GPU
+  #ifndef MPI_GPU
   // Host arrays for MPI communication
-  CHECK(cudaHostAlloc(&ha_,bytes_+bytes_,cudaHostAllocDefault));
+  GPU_Error_Check(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault));
   assert(ha_);
-  hb_ = ha_+nMax;
-#endif
+  hb_ = ha_ + nMax;
+  #endif
 }
 
 HenryPeriodic::~HenryPeriodic()
 {
-#ifndef MPI_GPU
-  CHECK(cudaFreeHost(ha_));
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaFreeHost(ha_));
   ha_ = hb_ = nullptr;
-#endif
-  CHECK(cufftDestroy(r2ck_));
-  CHECK(cufftDestroy(c2rk_));
-  CHECK(cufftDestroy(c2cj_));
-  CHECK(cufftDestroy(c2ci_));
+  #endif
+  GPU_Error_Check(cufftDestroy(r2ck_));
+  GPU_Error_Check(cufftDestroy(c2rk_));
+  GPU_Error_Check(cufftDestroy(c2cj_));
+  GPU_Error_Check(cufftDestroy(c2ci_));
   MPI_Comm_free(&commI_);
   MPI_Comm_free(&commJ_);
   MPI_Comm_free(&commK_);
diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp
index ab56fde79..0441d5487 100644
--- a/src/gravity/paris/HenryPeriodic.hpp
+++ b/src/gravity/paris/HenryPeriodic.hpp
@@ -1,73 +1,83 @@
 #pragma once
 
-#include <algorithm>
 #include <mpi.h>
 
+#include <algorithm>
+
 #include "../../utils/gpu.hpp"
 
 /**
  * @brief Generic distributed-memory 3D FFT filter.
  */
-class HenryPeriodic {
-  public:
-
-    /**
-     * @param[in] n[3] { Global number of cells in each dimension, without ghost cells. }
-     * @param[in] lo[3] { Physical location of the global lower bound of each dimension. }
-     * @param[in] hi[3] { Physical location of the global upper bound of each dimension, minus one grid cell. 
-     *                     The one-cell difference is because of the periodic domain.
-     *                     See @ref Potential_Paris_3D::Initialize for an example computation of these arguments. }
-     * @param[in] m[3] { Number of MPI tasks in each dimension. }
-     * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. }
-     */
-    HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]);
-
-    ~HenryPeriodic();
-
-    /**
-     * @return { Number of bytes needed for array arguments for @ref filter. }
-     */
-    size_t bytes() const { return bytes_; }
-
-    /**
-     * @detail { Performs a 3D FFT on the real input field,
-     *           applies the provided filter in frequency space,
-     *           and perform the inverse 3D FFT. 
-     *           Expects fields in 3D block distribution with no ghost cells. }
-     * @tparam F { Type of functor that will applied in frequency space.
-     *             Should be resolved implicitly by the compiler. }
-     * @param[in] bytes { Number of bytes allocated for arguments @ref before and @ref after.
-     *                    Used to ensure that the arrays have enough extra work space. }
-     * @param[in,out] before { Input field for filtering. Modified as a work array.
-     *                         Must be at least @ref bytes() bytes, likely larger than the original field. }
-     * @param[out] after { Output field, filtered. Modified as a work array.
-     *                     Must be at least @ref bytes() bytes, likely larger than the actual output field. }
-     * @param[in] f { Functor or lambda function to be used as a filter.
-     *                The operator should have the following prototype.
-     *                \code
-     *                complex f(int i, int j, int k, complex before)
-     *                \endcode
-     *                Arguments `i`, `j`, and `k` are the frequency-space coordinates.
-     *                Argument `before` is the input value at those indices, after the FFT.
-     *                The function should return the filtered value. }
-     */
-    template <typename F>
-    void filter(const size_t bytes, double *const before, double *const after, const F f) const;
-
-  private:
-    int idi_,idj_,idk_; //!< MPI coordinates of 3D block
-    int mi_,mj_,mk_; //!< Number of MPI tasks in each dimension of 3D domain
-    int nh_; //!< Global number of complex values in Z dimension, after R2C transform
-    int ni_,nj_,nk_; //!< Global number of real points in each dimension
-    int mp_,mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil
-    int idp_,idq_; //!< X and Y task IDs within Z pencil
-    MPI_Comm commI_,commJ_,commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils
-    int dh_,di_,dj_,dk_; //!< Max number of local points in each dimension
-    int dhq_,dip_,djp_,djq_; //!< Max number of local points in dimensions of 2D decompositions
-    size_t bytes_; //!< Max bytes needed for argument arrays
-    cufftHandle c2ci_,c2cj_,c2rk_,r2ck_; //!< Objects for forward and inverse FFTs
+class HenryPeriodic
+{
+ public:
+  /**
+   * @param[in] n[3] { Global number of cells in each dimension, without ghost
+   * cells. }
+   * @param[in] lo[3] { Physical location of the global lower bound of each
+   * dimension. }
+   * @param[in] hi[3] { Physical location of the global upper bound of each
+   * dimension, minus one grid cell. The one-cell difference is because of the
+   * periodic domain. See @ref PotentialParis3D::Initialize for an example
+   * computation of these arguments. }
+   * @param[in] m[3] { Number of MPI tasks in each dimension. }
+   * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. }
+   */
+  HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]);
+
+  ~HenryPeriodic();
+
+  /**
+   * @return { Number of bytes needed for array arguments for @ref filter. }
+   */
+  size_t bytes() const { return bytes_; }
+
+  /**
+   * @detail { Performs a 3D FFT on the real input field,
+   *           applies the provided filter in frequency space,
+   *           and perform the inverse 3D FFT.
+   *           Expects fields in 3D block distribution with no ghost cells. }
+   * @tparam F { Type of functor that will applied in frequency space.
+   *             Should be resolved implicitly by the compiler. }
+   * @param[in] bytes { Number of bytes allocated for arguments @ref before and
+   * @ref after. Used to ensure that the arrays have enough extra work space. }
+   * @param[in,out] before { Input field for filtering. Modified as a work
+   * array. Must be at least @ref bytes() bytes, likely larger than the original
+   * field. }
+   * @param[out] after { Output field, filtered. Modified as a work array.
+   *                     Must be at least @ref bytes() bytes, likely larger than
+   * the actual output field. }
+   * @param[in] f { Functor or lambda function to be used as a filter.
+   *                The operator should have the following prototype.
+   *                \code
+   *                complex f(int i, int j, int k, complex before)
+   *                \endcode
+   *                Arguments `i`, `j`, and `k` are the frequency-space
+   * coordinates. Argument `before` is the input value at those indices, after
+   * the FFT. The function should return the filtered value. }
+   */
+  template <typename F>
+  void filter(const size_t bytes, double *const before, double *const after, const F f) const;
+
+ private:
+  int idi_, idj_, idk_;  //!< MPI coordinates of 3D block
+  int mi_, mj_, mk_;     //!< Number of MPI tasks in each dimension of 3D domain
+  int nh_;               //!< Global number of complex values in Z dimension, after R2C
+                         //!< transform
+  int ni_, nj_, nk_;     //!< Global number of real points in each dimension
+  int mp_, mq_;          //!< Number of MPI tasks in X and Y dimensions of Z pencil
+  int idp_, idq_;        //!< X and Y task IDs within Z pencil
+  MPI_Comm commI_, commJ_,
+      commK_;              //!< Communicators of fellow tasks in X, Y, and Z pencils
+  int dh_, di_, dj_, dk_;  //!< Max number of local points in each dimension
+  int dhq_, dip_, djp_,
+      djq_;       //!< Max number of local points in dimensions of 2D decompositions
+  size_t bytes_;  //!< Max bytes needed for argument arrays
+  cufftHandle c2ci_, c2cj_, c2rk_,
+      r2ck_;  //!< Objects for forward and inverse FFTs
 #ifndef MPI_GPU
-    double *ha_, *hb_; //!< Host copies for MPI messages
+  double *ha_, *hb_;  //!< Host copies for MPI messages
 #endif
 };
 
@@ -79,10 +89,10 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con
   // Make sure arguments have enough space
   assert(bytes >= bytes_);
 
-  double *const a = after;
-  double *const b = before;
-  cufftDoubleComplex *const ac = reinterpret_cast<cufftDoubleComplex*>(a);
-  cufftDoubleComplex *const bc = reinterpret_cast<cufftDoubleComplex*>(b);
+  double *const a              = after;
+  double *const b              = before;
+  cufftDoubleComplex *const ac = reinterpret_cast<cufftDoubleComplex *>(a);
+  cufftDoubleComplex *const bc = reinterpret_cast<cufftDoubleComplex *>(b);
 
   // Local copies of member variables for lambda capture
 
@@ -96,323 +106,309 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con
 
   // Indices and sizes for pencil redistributions
 
-  const int idip = idi*mp+idp;
-  const int idjq = idj*mq+idq;
-  const int mip = mi*mp;
-  const int mjq = mj*mq;
+  const int idip = idi * mp + idp;
+  const int idjq = idj * mq + idq;
+  const int mip  = mi * mp;
+  const int mjq  = mj * mq;
 
   // Reorder 3D block into sub-pencils
 
   gpuFor(
-    mp,mq,dip,djq,dk,
-    GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) {
-      const int ii = p*dip+i; 
-      const int jj = q*djq+j;
-      const int ia = k+dk*(j+djq*(i+dip*(q+mq*p)));
-      const int ib = k+dk*(jj+dj*ii);
-      a[ia] = b[ib];
-    });
+      mp, mq, dip, djq, dk, GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) {
+        const int ii = p * dip + i;
+        const int jj = q * djq + j;
+        const int ia = k + dk * (j + djq * (i + dip * (q + mq * p)));
+        const int ib = k + dk * (jj + dj * ii);
+        a[ia]        = b[ib];
+      });
 
   // Redistribute into Z pencils
 
-  const int countK = dip*djq*dk;
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,countK,MPI_DOUBLE,hb_,countK,MPI_DOUBLE,commK_);
-  CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(a,countK,MPI_DOUBLE,b,countK,MPI_DOUBLE,commK_);
-#endif
+  const int countK = dip * djq * dk;
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_);
+  GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_);
+  #endif
 
   // Make Z pencils contiguous in Z
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int jLo = idj*dj+idq*djq;
-    const int jHi = std::min({jLo+djq,(idj+1)*dj,nj});
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int jLo = idj * dj + idq * djq;
+    const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj});
     gpuFor(
-      iHi-iLo,jHi-jLo,mk,dk,
-      GPU_LAMBDA(const int i, const int j, const int pq, const int k) {
-        const int kk = pq*dk+k;
-        if (kk < nk) {
-          const int ia = kk+nk*(j+djq*i);
-          const int ib = k+dk*(j+djq*(i+dip*pq));
-          a[ia] = b[ib];
-        }
-      });
+        iHi - iLo, jHi - jLo, mk, dk, GPU_LAMBDA(const int i, const int j, const int pq, const int k) {
+          const int kk = pq * dk + k;
+          if (kk < nk) {
+            const int ia = kk + nk * (j + djq * i);
+            const int ib = k + dk * (j + djq * (i + dip * pq));
+            a[ia]        = b[ib];
+          }
+        });
   }
 
   // Real-to-complex FFT in Z
-  CHECK(cufftExecD2Z(r2ck_,a,bc));
+  GPU_Error_Check(cufftExecD2Z(r2ck_, a, bc));
 
   // Rearrange for Y redistribution
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int jLo = idj_*dj_+idq*djq;
-    const int jHi = std::min({jLo+djq,(idj+1)*dj,nj});
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int jLo = idj_ * dj_ + idq * djq;
+    const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj});
     gpuFor(
-      mjq,iHi-iLo,jHi-jLo,dhq,
-      GPU_LAMBDA(const int q, const int i, const int j, const int k) {
-        const int kk = q*dhq+k;
-        if (kk < nh) {
-          const int ia = k+dhq*(j+djq*(i+dip*q));
-          const int ib = kk+nh*(j+djq*i);
-          ac[ia] = bc[ib];
-        }
-      });
+        mjq, iHi - iLo, jHi - jLo, dhq, GPU_LAMBDA(const int q, const int i, const int j, const int k) {
+          const int kk = q * dhq + k;
+          if (kk < nh) {
+            const int ia = k + dhq * (j + djq * (i + dip * q));
+            const int ib = kk + nh * (j + djq * i);
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Redistribute for Y pencils
-  const int countJ = 2*dip*djq*dhq;
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,countJ,MPI_DOUBLE,hb_,countJ,MPI_DOUBLE,commJ_);
-  CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(a,countJ,MPI_DOUBLE,b,countJ,MPI_DOUBLE,commJ_);
-#endif
+  const int countJ = 2 * dip * djq * dhq;
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_);
+  GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_);
+  #endif
 
   // Make Y pencils contiguous in Y
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int kLo = idjq*dhq;
-    const int kHi = std::min(kLo+dhq,nh);
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int kLo = idjq * dhq;
+    const int kHi = std::min(kLo + dhq, nh);
     gpuFor(
-      kHi-kLo,iHi-iLo,mj,mq,djq,
-      GPU_LAMBDA(const int k, const int i, const int r, const int q, const int j) {
-        const int rdj = r*dj;
-        const int jj = rdj+q*djq+j;
-        if ((jj < nj) && (jj < rdj+dj)) {
-          const int ia = jj+nj*(i+dip*k);
-          const int ib = k+dhq*(j+djq*(i+dip*(q+mq*r)));
-          ac[ia] = bc[ib];
-        }
-      });
+        kHi - kLo, iHi - iLo, mj, mq, djq, GPU_LAMBDA(const int k, const int i, const int r, const int q, const int j) {
+          const int rdj = r * dj;
+          const int jj  = rdj + q * djq + j;
+          if ((jj < nj) && (jj < rdj + dj)) {
+            const int ia = jj + nj * (i + dip * k);
+            const int ib = k + dhq * (j + djq * (i + dip * (q + mq * r)));
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Forward FFT in Y
-  CHECK(cufftExecZ2Z(c2cj_,ac,bc,CUFFT_FORWARD));
+  GPU_Error_Check(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_FORWARD));
 
   // Rearrange for X redistribution
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int kLo = idjq*dhq;
-    const int kHi = std::min(kLo+dhq,nh);
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int kLo = idjq * dhq;
+    const int kHi = std::min(kLo + dhq, nh);
     gpuFor(
-      mip,kHi-kLo,iHi-iLo,djp,
-      GPU_LAMBDA(const int p, const int k, const int i, const int j) {
-        const int jj = p*djp+j;
-        if (jj < nj) {
-          const int ia = j+djp*(i+dip*(k+dhq*p));
-          const int ib = jj+nj*(i+dip*k);
-          ac[ia] = bc[ib];
-        }
-      });
+        mip, kHi - kLo, iHi - iLo, djp, GPU_LAMBDA(const int p, const int k, const int i, const int j) {
+          const int jj = p * djp + j;
+          if (jj < nj) {
+            const int ia = j + djp * (i + dip * (k + dhq * p));
+            const int ib = jj + nj * (i + dip * k);
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Redistribute for X pencils
-  const int countI = 2*dip*djp*dhq;
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,countI,MPI_DOUBLE,hb_,countI,MPI_DOUBLE,commI_);
-  CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(a,countI,MPI_DOUBLE,b,countI,MPI_DOUBLE,commI_);
-#endif
+  const int countI = 2 * dip * djp * dhq;
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_);
+  GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_);
+  #endif
 
   // Make X pencils contiguous in X
   {
-    const int jLo = idip*djp;
-    const int jHi = std::min(jLo+djp,nj);
-    const int kLo = idjq*dhq;
-    const int kHi = std::min(kLo+dhq,nh);
+    const int jLo = idip * djp;
+    const int jHi = std::min(jLo + djp, nj);
+    const int kLo = idjq * dhq;
+    const int kHi = std::min(kLo + dhq, nh);
     gpuFor(
-      jHi-jLo,kHi-kLo,mi,mp,dip,
-      GPU_LAMBDA(const int j, const int k, const int r, const int p, const int i) {
-        const int rdi = r*di;
-        const int ii = rdi+p*dip+i;
-        if ((ii < ni) && (ii < rdi+di)) {
-          const int ia = ii+ni*(k+dhq*j);
-          const int ib = j+djp*(i+dip*(k+dhq*(p+mp*r)));
-          ac[ia] = bc[ib];
-        }
-      });
+        jHi - jLo, kHi - kLo, mi, mp, dip, GPU_LAMBDA(const int j, const int k, const int r, const int p, const int i) {
+          const int rdi = r * di;
+          const int ii  = rdi + p * dip + i;
+          if ((ii < ni) && (ii < rdi + di)) {
+            const int ia = ii + ni * (k + dhq * j);
+            const int ib = j + djp * (i + dip * (k + dhq * (p + mp * r)));
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Forward FFT in X
-  CHECK(cufftExecZ2Z(c2ci_,ac,bc,CUFFT_FORWARD));
+  GPU_Error_Check(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_FORWARD));
 
   // Apply filter in frequency space distributed in X pencils
 
-  const int jLo = idip*djp;
-  const int jHi = std::min(jLo+djp,nj);
-  const int kLo = idjq*dhq;
-  const int kHi = std::min(kLo+dhq,nh);
+  const int jLo = idip * djp;
+  const int jHi = std::min(jLo + djp, nj);
+  const int kLo = idjq * dhq;
+  const int kHi = std::min(kLo + dhq, nh);
 
   gpuFor(
-    jHi-jLo,kHi-kLo,ni,
-    GPU_LAMBDA(const int j0, const int k0, const int i) {
-      const int j = jLo+j0;
-      const int k = kLo+k0;
-      const int iab = i+ni*(k0+dhq*j0);
-      ac[iab] = f(i,j,k,bc[iab]);
-    });
+      jHi - jLo, kHi - kLo, ni, GPU_LAMBDA(const int j0, const int k0, const int i) {
+        const int j   = jLo + j0;
+        const int k   = kLo + k0;
+        const int iab = i + ni * (k0 + dhq * j0);
+        ac[iab]       = f(i, j, k, bc[iab]);
+      });
 
   // Backward FFT in X
-  CHECK(cufftExecZ2Z(c2ci_,ac,bc,CUFFT_INVERSE));
+  GPU_Error_Check(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_INVERSE));
 
   // Rearrange for Y redistribution
   {
-    const int jLo = idip*djp;
-    const int jHi = std::min(jLo+djp,nj);
-    const int kLo = idjq*dhq;
-    const int kHi = std::min(kLo+dhq,nh);
+    const int jLo = idip * djp;
+    const int jHi = std::min(jLo + djp, nj);
+    const int kLo = idjq * dhq;
+    const int kHi = std::min(kLo + dhq, nh);
     gpuFor(
-      mi,mp,jHi-jLo,kHi-kLo,dip,
-      GPU_LAMBDA(const int r, const int p, const int j, const int k, const int i) {
-        const int rdi = r*di;
-        const int ii = rdi+p*dip+i;
-        if ((ii < ni) && (ii < rdi+di)) {
-          const int ia = i+dip*(k+dhq*(j+djp*(p+mp*r)));
-          const int ib = ii+ni*(k+dhq*j);
-          ac[ia] = bc[ib];
-        }
-      });
+        mi, mp, jHi - jLo, kHi - kLo, dip, GPU_LAMBDA(const int r, const int p, const int j, const int k, const int i) {
+          const int rdi = r * di;
+          const int ii  = rdi + p * dip + i;
+          if ((ii < ni) && (ii < rdi + di)) {
+            const int ia = i + dip * (k + dhq * (j + djp * (p + mp * r)));
+            const int ib = ii + ni * (k + dhq * j);
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Redistribute for Y pencils
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,countI,MPI_DOUBLE,hb_,countI,MPI_DOUBLE,commI_);
-  CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(a,countI,MPI_DOUBLE,b,countI,MPI_DOUBLE,commI_);
-#endif
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_);
+  GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_);
+  #endif
 
   // Make Y pencils contiguous in Y
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int kLo = idjq*dhq;
-    const int kHi = std::min(kLo+dhq,nh);
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int kLo = idjq * dhq;
+    const int kHi = std::min(kLo + dhq, nh);
     gpuFor(
-      kHi-kLo,iHi-iLo,mip,djp,
-      GPU_LAMBDA(const int k, const int i, const int p, const int j) {
-        const int jj = p*djp+j;
-        if (jj < nj) {
-          const int ia = jj+nj*(i+dip*k);
-          const int ib = i+dip*(k+dhq*(j+djp*p));
-          ac[ia] = bc[ib];
-        }
-      });
+        kHi - kLo, iHi - iLo, mip, djp, GPU_LAMBDA(const int k, const int i, const int p, const int j) {
+          const int jj = p * djp + j;
+          if (jj < nj) {
+            const int ia = jj + nj * (i + dip * k);
+            const int ib = i + dip * (k + dhq * (j + djp * p));
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Backward FFT in Y
-  CHECK(cufftExecZ2Z(c2cj_,ac,bc,CUFFT_INVERSE));
+  GPU_Error_Check(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_INVERSE));
 
   // Rearrange for Z redistribution
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int kLo = idjq*dhq;
-    const int kHi = std::min(kLo+dhq,nh);
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int kLo = idjq * dhq;
+    const int kHi = std::min(kLo + dhq, nh);
     gpuFor(
-      mj,mq,kHi-kLo,iHi-iLo,djq,
-      GPU_LAMBDA(const int r, const int q, const int k, const int i, const int j) {
-        const int rdj = r*dj;
-        const int jj = rdj+q*djq+j;
-        if ((jj < nj) && (jj < rdj+dj)) {
-          const int ia = j+djq*(i+dip*(k+dhq*(q+mq*r)));
-          const int ib = jj+nj*(i+dip*k);
-          ac[ia] = bc[ib];
-        }
-      });
+        mj, mq, kHi - kLo, iHi - iLo, djq, GPU_LAMBDA(const int r, const int q, const int k, const int i, const int j) {
+          const int rdj = r * dj;
+          const int jj  = rdj + q * djq + j;
+          if ((jj < nj) && (jj < rdj + dj)) {
+            const int ia = j + djq * (i + dip * (k + dhq * (q + mq * r)));
+            const int ib = jj + nj * (i + dip * k);
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Redistribute in Z pencils
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,countJ,MPI_DOUBLE,hb_,countJ,MPI_DOUBLE,commJ_);
-  CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(a,countJ,MPI_DOUBLE,b,countJ,MPI_DOUBLE,commJ_);
-#endif
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_);
+  GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_);
+  #endif
 
   // Make Z pencils contiguous in Z
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int jLo = idj*dj+idq*djq;
-    const int jHi = std::min({jLo+djq,(idj+1)*dj,nj});
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int jLo = idj * dj + idq * djq;
+    const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj});
     gpuFor(
-      iHi-iLo,jHi-jLo,mjq,dhq,
-      GPU_LAMBDA(const int i, const int j, const int q, const int k) {
-        const int kk = q*dhq+k;
-        if (kk < nh) {
-          const int ia = kk+nh*(j+djq*i);
-          const int ib = j+djq*(i+dip*(k+dhq*q));
-          ac[ia] = bc[ib];
-        }
-      });
+        iHi - iLo, jHi - jLo, mjq, dhq, GPU_LAMBDA(const int i, const int j, const int q, const int k) {
+          const int kk = q * dhq + k;
+          if (kk < nh) {
+            const int ia = kk + nh * (j + djq * i);
+            const int ib = j + djq * (i + dip * (k + dhq * q));
+            ac[ia]       = bc[ib];
+          }
+        });
   }
 
   // Complex-to-real FFT in Z
-  CHECK(cufftExecZ2D(c2rk_,ac,b));
+  GPU_Error_Check(cufftExecZ2D(c2rk_, ac, b));
 
   // Rearrange for 3D-block redistribution
   {
-    const int iLo = idi*di+idp*dip;
-    const int iHi = std::min({iLo+dip,(idi+1)*di,ni});
-    const int jLo = idj*dj+idq*djq;
-    const int jHi = std::min({jLo+djq,(idj+1)*dj,nj});
+    const int iLo = idi * di + idp * dip;
+    const int iHi = std::min({iLo + dip, (idi + 1) * di, ni});
+    const int jLo = idj * dj + idq * djq;
+    const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj});
     gpuFor(
-      mk,iHi-iLo,jHi-jLo,dk,
-      GPU_LAMBDA(const int pq, const int i, const int j, const int k) {
-        const int kk = pq*dk+k;
-        if (kk < nk) {
-          const int ia = k+dk*(j+djq*(i+dip*pq));
-          const int ib = kk+nk*(j+djq*i);
-          a[ia] = b[ib];
-        }
-      });
+        mk, iHi - iLo, jHi - jLo, dk, GPU_LAMBDA(const int pq, const int i, const int j, const int k) {
+          const int kk = pq * dk + k;
+          if (kk < nk) {
+            const int ia = k + dk * (j + djq * (i + dip * pq));
+            const int ib = kk + nk * (j + djq * i);
+            a[ia]        = b[ib];
+          }
+        });
   }
 
   // Redistribute for 3D blocks
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,countK,MPI_DOUBLE,hb_,countK,MPI_DOUBLE,commK_);
-  CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(a,countK,MPI_DOUBLE,b,countK,MPI_DOUBLE,commK_);
-#endif
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_);
+  GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_);
+  #endif
 
   // Rearrange into 3D blocks and apply FFT normalization
   {
-    const double divN = 1.0/(double(ni)*double(nj)*double(nk));
-    const int kLo = idk*dk;
-    const int kHi = std::min(kLo+dk,nk);
+    const double divN = 1.0 / (double(ni) * double(nj) * double(nk));
+    const int kLo     = idk * dk;
+    const int kHi     = std::min(kLo + dk, nk);
     gpuFor(
-      mp,dip,mq,djq,kHi-kLo,
-      GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) {
-        const int ii = p*dip+i;
-        const int jj = q*djq+j;
-        if ((ii < di) && (jj < dj)) {
-          const int ia = k+dk*(jj+dj*ii);
-          const int ib = k+dk*(j+djq*(i+dip*(q+mq*p)));
-          a[ia] = divN*b[ib];
-        }
-      });
+        mp, dip, mq, djq, kHi - kLo, GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) {
+          const int ii = p * dip + i;
+          const int jj = q * djq + j;
+          if ((ii < di) && (jj < dj)) {
+            const int ia = k + dk * (jj + dj * ii);
+            const int ib = k + dk * (j + djq * (i + dip * (q + mq * p)));
+            a[ia]        = divN * b[ib];
+          }
+        });
   }
 }
 
 #endif
-
diff --git a/src/gravity/paris/ParisPeriodic.cu b/src/gravity/paris/ParisPeriodic.cu
index 671b42aef..0b2e5ef5a 100644
--- a/src/gravity/paris/ParisPeriodic.cu
+++ b/src/gravity/paris/ParisPeriodic.cu
@@ -1,31 +1,32 @@
 #ifdef PARIS
 
-#include "ParisPeriodic.hpp"
+  #include <cmath>
 
-#include <cmath>
+  #include "ParisPeriodic.hpp"
 
-__host__ __device__ static inline double sqr(const double x) { return x*x; }
+__host__ __device__ static inline double Sqr(const double x) { return x * x; }
 
-ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]):
-  ni_(n[0]),
-  nj_(n[1]),
-#ifdef PARIS_3PT
-  nk_(n[2]),
-  ddi_(2.0*double(n[0]-1)/(hi[0]-lo[0])),
-  ddj_(2.0*double(n[1]-1)/(hi[1]-lo[1])),
-  ddk_(2.0*double(n[2]-1)/(hi[2]-lo[2])),
-#elif defined PARIS_5PT
-  nk_(n[2]),
-  ddi_(sqr(double(n[0]-1)/(hi[0]-lo[0]))/6.0),
-  ddj_(sqr(double(n[1]-1)/(hi[1]-lo[1]))/6.0),
-  ddk_(sqr(double(n[2]-1)/(hi[2]-lo[2]))/6.0),
-#else
-  ddi_{2.0*M_PI*double(n[0]-1)/(double(n[0])*(hi[0]-lo[0]))},
-  ddj_{2.0*M_PI*double(n[1]-1)/(double(n[1])*(hi[1]-lo[1]))},
-  ddk_{2.0*M_PI*double(n[2]-1)/(double(n[2])*(hi[2]-lo[2]))},
-#endif
-  henry(n,lo,hi,m,id)
-{ }
+ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3])
+    : ni_(n[0]),
+      nj_(n[1]),
+  #ifdef PARIS_3PT
+      nk_(n[2]),
+      ddi_(2.0 * double(n[0] - 1) / (hi[0] - lo[0])),
+      ddj_(2.0 * double(n[1] - 1) / (hi[1] - lo[1])),
+      ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])),
+  #elif defined PARIS_5PT
+      nk_(n[2]),
+      ddi_(Sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0),
+      ddj_(Sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0),
+      ddk_(Sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0),
+  #else
+      ddi_{2.0 * M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))},
+      ddj_{2.0 * M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))},
+      ddk_{2.0 * M_PI * double(n[2] - 1) / (double(n[2]) * (hi[2] - lo[2]))},
+  #endif
+      henry(n, lo, hi, m, id)
+{
+}
 
 void ParisPeriodic::solve(const size_t bytes, double *const density, double *const potential) const
 {
@@ -34,44 +35,44 @@ void ParisPeriodic::solve(const size_t bytes, double *const density, double *con
   const double ddi = ddi_, ddj = ddj_, ddk = ddk_;
 
   // Poisson-solve constants that depend on divergence-operator approximation
-#ifdef PARIS_3PT
-  const int nk = nk_;
-  const double si = M_PI/double(ni);
-  const double sj = M_PI/double(nj);
-  const double sk = M_PI/double(nk);
-#elif defined PARIS_5PT
-  const int nk = nk_;
-  const double si = 2.0*M_PI/double(ni);
-  const double sj = 2.0*M_PI/double(nj);
-  const double sk = 2.0*M_PI/double(nk);
-#endif
+  #ifdef PARIS_3PT
+  const int nk    = nk_;
+  const double si = M_PI / double(ni);
+  const double sj = M_PI / double(nj);
+  const double sk = M_PI / double(nk);
+  #elif defined PARIS_5PT
+  const int nk    = nk_;
+  const double si = 2.0 * M_PI / double(ni);
+  const double sj = 2.0 * M_PI / double(nj);
+  const double sk = 2.0 * M_PI / double(nk);
+  #endif
 
   // Provide FFT filter with a lambda that does Poisson solve in frequency space
-  henry.filter(bytes,density,potential,
-    [=] __device__ (const int i, const int j, const int k, const cufftDoubleComplex b) {
-      if (i || j || k) {
-#ifdef PARIS_3PT
-        const double i2 = sqr(sin(double(min(i,ni-i))*si)*ddi);
-        const double j2 = sqr(sin(double(min(j,nj-j))*sj)*ddj);
-        const double k2 = sqr(sin(double(k)*sk)*ddk);
-#elif defined PARIS_5PT
-        const double ci = cos(double(min(i,ni-i))*si);
-        const double cj = cos(double(min(j,nj-j))*sj);
-        const double ck = cos(double(k)*sk);
-        const double i2 = ddi*(2.0*ci*ci-16.0*ci+14.0);
-        const double j2 = ddj*(2.0*cj*cj-16.0*cj+14.0);
-        const double k2 = ddk*(2.0*ck*ck-16.0*ck+14.0);
-#else
-        const double i2 = sqr(double(min(i,ni-i))*ddi);
-        const double j2 = sqr(double(min(j,nj-j))*ddj);
-        const double k2 = sqr(double(k)*ddk);
-#endif
-        const double d = -1.0/(i2+j2+k2);
-        return cufftDoubleComplex{d*b.x,d*b.y};
-      } else {
-        return cufftDoubleComplex{0.0,0.0};
-      }
-    });
+  henry.filter(bytes, density, potential,
+               [=] __device__(const int i, const int j, const int k, const cufftDoubleComplex b) {
+                 if (i || j || k) {
+  #ifdef PARIS_3PT
+                   const double i2 = Sqr(sin(double(min(i, ni - i)) * si) * ddi);
+                   const double j2 = Sqr(sin(double(min(j, nj - j)) * sj) * ddj);
+                   const double k2 = Sqr(sin(double(k) * sk) * ddk);
+  #elif defined PARIS_5PT
+          const double ci = cos(double(min(i, ni - i)) * si);
+          const double cj = cos(double(min(j, nj - j)) * sj);
+          const double ck = cos(double(k) * sk);
+          const double i2 = ddi * (2.0 * ci * ci - 16.0 * ci + 14.0);
+          const double j2 = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0);
+          const double k2 = ddk * (2.0 * ck * ck - 16.0 * ck + 14.0);
+  #else
+          const double i2 = Sqr(double(min(i, ni - i)) * ddi);
+          const double j2 = Sqr(double(min(j, nj - j)) * ddj);
+          const double k2 = Sqr(double(k) * ddk);
+  #endif
+                   const double d = -1.0 / (i2 + j2 + k2);
+                   return cufftDoubleComplex{d * b.x, d * b.y};
+                 } else {
+                   return cufftDoubleComplex{0.0, 0.0};
+                 }
+               });
 }
 
 #endif
diff --git a/src/gravity/paris/ParisPeriodic.hpp b/src/gravity/paris/ParisPeriodic.hpp
index 92b07becd..8069cde65 100644
--- a/src/gravity/paris/ParisPeriodic.hpp
+++ b/src/gravity/paris/ParisPeriodic.hpp
@@ -5,44 +5,49 @@
 /**
  * @brief Periodic Poisson solver using @ref Henry FFT filter.
  */
-class ParisPeriodic {
-  public:
+class ParisPeriodic
+{
+ public:
+  /**
+   * @param[in] n[3] { Global number of cells in each dimension, without ghost
+   * cells. }
+   * @param[in] lo[3] { Physical location of the global lower bound of each
+   * dimension. }
+   * @param[in] hi[3] { Physical location of the global upper bound of each
+   * dimension, minus one grid cell. The one-cell difference is because of the
+   * periodic domain. See @ref PotentialParis3D::Initialize for an example
+   * computation of these arguments. }
+   * @param[in] m[3] { Number of MPI tasks in each dimension. }
+   * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. }
+   */
+  ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]);
 
-    /**
-     * @param[in] n[3] { Global number of cells in each dimension, without ghost cells. }
-     * @param[in] lo[3] { Physical location of the global lower bound of each dimension. }
-     * @param[in] hi[3] { Physical location of the global upper bound of each dimension, minus one grid cell.
-     *                     The one-cell difference is because of the periodic domain.
-     *                     See @ref Potential_Paris_3D::Initialize for an example computation of these arguments. }
-     * @param[in] m[3] { Number of MPI tasks in each dimension. }
-     * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. }
-     */
-    ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]);
+  /**
+   * @return { Number of bytes needed for array arguments for @ref solve. }
+   */
+  size_t bytes() const { return henry.bytes(); }
 
-    /**
-     * @return { Number of bytes needed for array arguments for @ref solve. }
-     */
-    size_t bytes() const { return henry.bytes(); }
+  /**
+   * @detail { Solves the Poisson equation for the potential derived from the
+   * provided density. Assumes periodic boundary conditions. Assumes fields have
+   * no ghost cells. Uses a 3D FFT provided by the @ref Henry class. }
+   * @param[in] bytes { Number of bytes allocated for arguments @ref density and
+   * @ref potential. Used to ensure that the arrays have enough extra work
+   * space. }
+   * @param[in,out] density { Input density field. Modified as a work array.
+   *                          Must be at least @ref bytes() bytes, likely larger
+   * than the original field. }
+   * @param[out] potential { Output potential. Modified as a work array.
+   *                         Must be at least @ref bytes() bytes, likely larger
+   * than the actual output field. }
+   */
+  void solve(size_t bytes, double *density, double *potential) const;
 
-    /**
-     * @detail { Solves the Poisson equation for the potential derived from the provided density.
-     *           Assumes periodic boundary conditions.
-     *           Assumes fields have no ghost cells.
-     *           Uses a 3D FFT provided by the @ref Henry class. }
-     * @param[in] bytes { Number of bytes allocated for arguments @ref density and @ref potential.
-     *                    Used to ensure that the arrays have enough extra work space. }
-     * @param[in,out] density { Input density field. Modified as a work array.
-     *                          Must be at least @ref bytes() bytes, likely larger than the original field. }
-     * @param[out] potential { Output potential. Modified as a work array.
-     *                         Must be at least @ref bytes() bytes, likely larger than the actual output field. }
-     */
-    void solve(size_t bytes, double *density, double *potential) const;
-
-  private:
-    int ni_,nj_; //!< Number of elements in X and Y dimensions
+ private:
+  int ni_, nj_;  //!< Number of elements in X and Y dimensions
 #if defined(PARIS_3PT) || defined(PARIS_5PT)
-    int nk_; //!< Number of elements in Z dimension
+  int nk_;  //!< Number of elements in Z dimension
 #endif
-    double ddi_,ddj_,ddk_; //!< Frequency-independent terms in Poisson solve
-    HenryPeriodic henry; //!< FFT filter object
+  double ddi_, ddj_, ddk_;  //!< Frequency-independent terms in Poisson solve
+  HenryPeriodic henry;      //!< FFT filter object
 };
diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu
index 29093e2a3..84e070160 100644
--- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu
+++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu
@@ -1,115 +1,123 @@
 #ifdef PARIS_GALACTIC
 
-#include "PoissonZero3DBlockedGPU.hpp"
+  #include <algorithm>
+  #include <cassert>
+  #include <cmath>
+  #include <cstdio>
+  #include <cstdlib>
 
-#include <algorithm>
-#include <cassert>
-#include <cmath>
-#include <cstdio>
-#include <cstdlib>
+  #include "PoissonZero3DBlockedGPU.hpp"
 
 static constexpr double sqrt2 = 0.4142135623730950488016887242096980785696718753769480731766797379;
 
-static inline __host__ __device__ double sqr(const double x) { return x*x; }
+static inline __host__ __device__ double Sqr(const double x) { return x * x; }
 
-PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]):
-#ifdef PARIS_GALACTIC_3PT
-  ddi_(2.0*double(n[0]-1)/(hi[0]-lo[0])),
-  ddj_(2.0*double(n[1]-1)/(hi[1]-lo[1])),
-  ddk_(2.0*double(n[2]-1)/(hi[2]-lo[2])),
-#elif defined PARIS_GALACTIC_5PT
-  ddi_(sqr(double(n[0]-1)/(hi[0]-lo[0]))/6.0),
-  ddj_(sqr(double(n[1]-1)/(hi[1]-lo[1]))/6.0),
-  ddk_(sqr(double(n[2]-1)/(hi[2]-lo[2]))/6.0),
-#else
-  ddi_{M_PI*double(n[0]-1)/(double(n[0])*(hi[0]-lo[0]))},
-  ddj_{M_PI*double(n[1]-1)/(double(n[1])*(hi[1]-lo[1]))},
-  ddk_{M_PI*double(n[2]-1)/(double(n[2])*(hi[2]-lo[2]))},
-#endif
-  idi_(id[0]),
-  idj_(id[1]),
-  idk_(id[2]),
-  mi_(m[0]),
-  mj_(m[1]),
-  mk_(m[2]),
-  ni_(n[0]),
-  nj_(n[1]),
-  nk_(n[2])
+PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3],
+                                                 const int id[3])
+    :
+  #ifdef PARIS_GALACTIC_3PT
+      ddi_(2.0 * double(n[0] - 1) / (hi[0] - lo[0])),
+      ddj_(2.0 * double(n[1] - 1) / (hi[1] - lo[1])),
+      ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])),
+  #elif defined PARIS_GALACTIC_5PT
+      ddi_(Sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0),
+      ddj_(Sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0),
+      ddk_(Sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0),
+  #else
+      ddi_{M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))},
+      ddj_{M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))},
+      ddk_{M_PI * double(n[2] - 1) / (double(n[2]) * (hi[2] - lo[2]))},
+  #endif
+      idi_(id[0]),
+      idj_(id[1]),
+      idk_(id[2]),
+      mi_(m[0]),
+      mj_(m[1]),
+      mk_(m[2]),
+      ni_(n[0]),
+      nj_(n[1]),
+      nk_(n[2])
 {
-  mq_ = int(round(sqrt(mk_)));
-  while (mk_%mq_) mq_--;
-  mp_ = mk_/mq_;
-  assert(mp_*mq_ == mk_);
+  mq_ = int(round(Sqr(mk_)));
+  while (mk_ % mq_) {
+    mq_--;
+  }
+  mp_ = mk_ / mq_;
+  assert(mp_ * mq_ == mk_);
 
-  idp_ = idk_/mq_;
-  idq_ = idk_%mq_;
+  idp_ = idk_ / mq_;
+  idq_ = idk_ % mq_;
 
   {
-    const int color = idi_*mj_+idj_;
-    const int key = idk_;
-    MPI_Comm_split(MPI_COMM_WORLD,color,key,&commK_);
+    const int color = idi_ * mj_ + idj_;
+    const int key   = idk_;
+    MPI_Comm_split(MPI_COMM_WORLD, color, key, &commK_);
   }
   {
-    const int color = idi_*mp_+idp_;
-    const int key = idj_*mq_+idq_;
-    MPI_Comm_split(MPI_COMM_WORLD,color,key,&commJ_);
+    const int color = idi_ * mp_ + idp_;
+    const int key   = idj_ * mq_ + idq_;
+    MPI_Comm_split(MPI_COMM_WORLD, color, key, &commJ_);
   }
   {
-    const int color = idj_*mq_+idq_;
-    const int key = idi_*mp_+idp_;
-    MPI_Comm_split(MPI_COMM_WORLD,color,key,&commI_);
+    const int color = idj_ * mq_ + idq_;
+    const int key   = idi_ * mp_ + idp_;
+    MPI_Comm_split(MPI_COMM_WORLD, color, key, &commI_);
   }
-  di_ = (ni_+mi_-1)/mi_;
-  dj_ = (nj_+mj_-1)/mj_;
-  dk_ = (nk_+mk_-1)/mk_;
+  di_ = (ni_ + mi_ - 1) / mi_;
+  dj_ = (nj_ + mj_ - 1) / mj_;
+  dk_ = (nk_ + mk_ - 1) / mk_;
 
-  dip_ = (di_+mp_-1)/mp_;
-  djq_ = (dj_+mq_-1)/mq_;
-  const int mjq = mj_*mq_;
-  dkq_ = (nk_+mjq-1)/mjq;
-  const int mip = mi_*mp_;
-  djp_ = (nj_+mip-1)/mip;
+  dip_          = (di_ + mp_ - 1) / mp_;
+  djq_          = (dj_ + mq_ - 1) / mq_;
+  const int mjq = mj_ * mq_;
+  dkq_          = (nk_ + mjq - 1) / mjq;
+  const int mip = mi_ * mp_;
+  djp_          = (nj_ + mip - 1) / mip;
 
-  ni2_ = 2*(ni_/2+1);
-  nj2_ = 2*(nj_/2+1);
-  nk2_ = 2*(nk_/2+1);
+  ni2_ = 2 * (ni_ / 2 + 1);
+  nj2_ = 2 * (nj_ / 2 + 1);
+  nk2_ = 2 * (nk_ / 2 + 1);
 
-  const long nMax = std::max({di_*dj_*dk_,dip_*djq_*mk_*dk_,dip_*mp_*djq_*mq_*dk_,dip_*djq_*nk2_,dip_*djq_*mjq*dkq_,dip_*dkq_*nj2_,dip_*dkq_*mip*djp_,dkq_*djp_*mip*dip_,dkq_*djp_*ni2_});
-  bytes_ = nMax*sizeof(double);
+  const long nMax = std::max({di_ * dj_ * dk_, dip_ * djq_ * mk_ * dk_, dip_ * mp_ * djq_ * mq_ * dk_,
+                              dip_ * djq_ * nk2_, dip_ * djq_ * mjq * dkq_, dip_ * dkq_ * nj2_,
+                              dip_ * dkq_ * mip * djp_, dkq_ * djp_ * mip * dip_, dkq_ * djp_ * ni2_});
+  bytes_          = nMax * sizeof(double);
 
-  int nkh = nk_/2+1;
-  CHECK(cufftPlanMany(&d2zk_,1,&nk_,&nk_,1,nk_,&nkh,1,nkh,CUFFT_D2Z,dip_*djq_));
-  int njh = nj_/2+1;
-  CHECK(cufftPlanMany(&d2zj_,1,&nj_,&nj_,1,nj_,&njh,1,njh,CUFFT_D2Z,dip_*dkq_));
-  int nih = ni_/2+1;
-  CHECK(cufftPlanMany(&d2zi_,1,&ni_,&ni_,1,ni_,&nih,1,nih,CUFFT_D2Z,dkq_*djp_));
-#ifndef MPI_GPU
-  CHECK(cudaHostAlloc(&ha_,bytes_+bytes_,cudaHostAllocDefault));
+  int nkh = nk_ / 2 + 1;
+  GPU_Error_Check(cufftPlanMany(&d2zk_, 1, &nk_, &nk_, 1, nk_, &nkh, 1, nkh, CUFFT_D2Z, dip_ * djq_));
+  int njh = nj_ / 2 + 1;
+  GPU_Error_Check(cufftPlanMany(&d2zj_, 1, &nj_, &nj_, 1, nj_, &njh, 1, njh, CUFFT_D2Z, dip_ * dkq_));
+  int nih = ni_ / 2 + 1;
+  GPU_Error_Check(cufftPlanMany(&d2zi_, 1, &ni_, &ni_, 1, ni_, &nih, 1, nih, CUFFT_D2Z, dkq_ * djp_));
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault));
   assert(ha_);
-  hb_ = ha_+nMax;
-#endif
+  hb_ = ha_ + nMax;
+  #endif
 }
 
 PoissonZero3DBlockedGPU::~PoissonZero3DBlockedGPU()
 {
-#ifndef MPI_GPU
-  CHECK(cudaFreeHost(ha_));
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaFreeHost(ha_));
   ha_ = hb_ = nullptr;
-#endif
-  CHECK(cufftDestroy(d2zi_));
-  CHECK(cufftDestroy(d2zj_));
-  CHECK(cufftDestroy(d2zk_));
+  #endif
+  GPU_Error_Check(cufftDestroy(d2zi_));
+  GPU_Error_Check(cufftDestroy(d2zj_));
+  GPU_Error_Check(cufftDestroy(d2zk_));
   MPI_Comm_free(&commI_);
   MPI_Comm_free(&commJ_);
   MPI_Comm_free(&commK_);
 }
 
-void print(const char *const title, const int ni, const int nj, const int nk, const double *const v)
+void Print(const char *const title, const int ni, const int nj, const int nk, const double *const v)
 {
-  printf("%s:\n",title);
+  printf("%s:\n", title);
   for (int i = 0; i < ni; i++) {
     for (int j = 0; j < nj; j++) {
-      for (int k = 0; k < nk; k++) printf("%.6f ",v[(i*nj+j)*nk+k]);
+      for (int k = 0; k < nk; k++) {
+        printf("%.6f ", v[(i * nj + j) * nk + k]);
+      }
       printf("  ");
     }
     printf("\n");
@@ -125,400 +133,392 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou
 
   double *const ua = potential;
   double *const ub = density;
-  cufftDoubleComplex *const uc = reinterpret_cast<cufftDoubleComplex*>(ub);
+  auto *const uc   = reinterpret_cast<cufftDoubleComplex *>(ub);
 
   const double ddi = ddi_;
   const double ddj = ddj_;
   const double ddk = ddk_;
-  const int di = di_;
-  const int dip = dip_;
-  const int dj = dj_;
-  const int djp = djp_;
-  const int djq = djq_;
-  const int dk = dk_;
-  const int dkq = dkq_;
-  const int idi = idi_;
-  const int idj = idj_;
-  const int idp = idp_;
-  const int idq = idq_;
-  const int mp = mp_;
-  const int mq = mq_;
-  const int ni = ni_;
-  const int ni2 = ni2_;
-  const int nj = nj_;
-  const int nj2 = nj2_;
-  const int nk = nk_;
-  const int nk2 = nk2_;
+  const int di     = di_;
+  const int dip    = dip_;
+  const int dj     = dj_;
+  const int djp    = djp_;
+  const int djq    = djq_;
+  const int dk     = dk_;
+  const int dkq    = dkq_;
+  const int idi    = idi_;
+  const int idj    = idj_;
+  const int idp    = idp_;
+  const int idq    = idq_;
+  const int mp     = mp_;
+  const int mq     = mq_;
+  const int ni     = ni_;
+  const int ni2    = ni2_;
+  const int nj     = nj_;
+  const int nj2    = nj2_;
+  const int nk     = nk_;
+  const int nk2    = nk2_;
 
   gpuFor(
-    mp,mq,dip,djq,dk,
-    GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) {
-      const int iLo = p*dip;
-      const int jLo = q*djq;
-      if ((i+iLo < di) && (j+jLo < dj)) ua[(((p*mq+q)*dip+i)*djq+j)*dk+k] = ub[((i+iLo)*dj+j+jLo)*dk+k];
-    });
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,dip*djq*dk,MPI_DOUBLE,hb_,dip*djq*dk,MPI_DOUBLE,commK_);
-  CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(ua,dip*djq*dk,MPI_DOUBLE,ub,dip*djq*dk,MPI_DOUBLE,commK_);
-#endif
+      mp, mq, dip, djq, dk, GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) {
+        const int iLo = p * dip;
+        const int jLo = q * djq;
+        if ((i + iLo < di) && (j + jLo < dj)) {
+          ua[(((p * mq + q) * dip + i) * djq + j) * dk + k] = ub[((i + iLo) * dj + j + jLo) * dk + k];
+        }
+      });
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_);
+  GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_);
+  #endif
   gpuFor(
-    dip,djq,nk/2+1,
-    GPU_LAMBDA(const int i, const int j, const int k) {
-      const int ij = (i*djq+j)*nk;
-      const int kk = k+k;
-      if (k == 0) {
-        ua[ij] = ub[(i*djq+j)*dk];
-      } else if (kk == nk) {
-        const int pq = (nk-1)/dk;
-        const int kpq = (nk-1)%dk;
-        ua[ij+k] = -ub[((pq*dip+i)*djq+j)*dk+kpq];
-      } else {
-        const int pqa = (kk-1)/dk;
-        const int kka = (kk-1)%dk;
-        ua[ij+(nk-k)] = -ub[((pqa*dip+i)*djq+j)*dk+kka];
-        const int pqb = kk/dk;
-        const int kkb = kk%dk;
-        ua[ij+k] = ub[((pqb*dip+i)*djq+j)*dk+kkb];
-      }
-    });
-  CHECK(cufftExecD2Z(d2zk_,ua,uc));
+      dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) {
+        const int ij = (i * djq + j) * nk;
+        const int kk = k + k;
+        if (k == 0) {
+          ua[ij] = ub[(i * djq + j) * dk];
+        } else if (kk == nk) {
+          const int pq  = (nk - 1) / dk;
+          const int kpq = (nk - 1) % dk;
+          ua[ij + k]    = -ub[((pq * dip + i) * djq + j) * dk + kpq];
+        } else {
+          const int pqa     = (kk - 1) / dk;
+          const int kka     = (kk - 1) % dk;
+          ua[ij + (nk - k)] = -ub[((pqa * dip + i) * djq + j) * dk + kka];
+          const int pqb     = kk / dk;
+          const int kkb     = kk % dk;
+          ua[ij + k]        = ub[((pqb * dip + i) * djq + j) * dk + kkb];
+        }
+      });
+  GPU_Error_Check(cufftExecD2Z(d2zk_, ua, uc));
   gpuFor(
-    dip,nk/2+1,djq,
-    GPU_LAMBDA(const int i, const int k, const int j) {
-      if (k == 0) {
-        const int q0 = (nk-1)/dkq;
-        const int k0 = (nk-1)%dkq;
-        ua[((q0*dip+i)*dkq+k0)*djq+j] = 2.0*ub[(i*djq+j)*nk2];
-      } else if (k+k == nk) {
-        const int qa = (nk/2-1)/dkq;
-        const int ka = (nk/2-1)%dkq;
-        ua[((qa*dip+i)*dkq+ka)*djq+j] = sqrt2*ub[(i*djq+j)*nk2+nk];
-      } else {
-        const int qa = (nk-k-1)/dkq;
-        const int ka = (nk-k-1)%dkq;
-        const int qb = (k-1)/dkq;
-        const int kb = (k-1)%dkq;
-        const double ak = 2.0*ub[(i*djq+j)*nk2+2*k];
-        const double bk = 2.0*ub[(i*djq+j)*nk2+2*k+1];
-        double wa,wb;
-        sincospi(double(k)/double(nk+nk),&wb,&wa);
-        ua[((qa*dip+i)*dkq+ka)*djq+j] = wa*ak+wb*bk;
-        ua[((qb*dip+i)*dkq+kb)*djq+j] = wb*ak-wa*bk;
-      }
-    });
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,dip*dkq*djq,MPI_DOUBLE,hb_,dip*dkq*djq,MPI_DOUBLE,commJ_);
-  CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(ua,dip*dkq*djq,MPI_DOUBLE,ub,dip*dkq*djq,MPI_DOUBLE,commJ_);
-#endif
+      dip, nk / 2 + 1, djq, GPU_LAMBDA(const int i, const int k, const int j) {
+        if (k == 0) {
+          const int q0                              = (nk - 1) / dkq;
+          const int k0                              = (nk - 1) % dkq;
+          ua[((q0 * dip + i) * dkq + k0) * djq + j] = 2.0 * ub[(i * djq + j) * nk2];
+        } else if (k + k == nk) {
+          const int qa                              = (nk / 2 - 1) / dkq;
+          const int ka                              = (nk / 2 - 1) % dkq;
+          ua[((qa * dip + i) * dkq + ka) * djq + j] = sqrt2 * ub[(i * djq + j) * nk2 + nk];
+        } else {
+          const int qa    = (nk - k - 1) / dkq;
+          const int ka    = (nk - k - 1) % dkq;
+          const int qb    = (k - 1) / dkq;
+          const int kb    = (k - 1) % dkq;
+          const double ak = 2.0 * ub[(i * djq + j) * nk2 + 2 * k];
+          const double bk = 2.0 * ub[(i * djq + j) * nk2 + 2 * k + 1];
+          double wa, wb;
+          sincospi(double(k) / double(nk + nk), &wb, &wa);
+          ua[((qa * dip + i) * dkq + ka) * djq + j] = wa * ak + wb * bk;
+          ua[((qb * dip + i) * dkq + kb) * djq + j] = wb * ak - wa * bk;
+        }
+      });
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, dip * dkq * djq, MPI_DOUBLE, hb_, dip * dkq * djq, MPI_DOUBLE, commJ_);
+  GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(ua, dip * dkq * djq, MPI_DOUBLE, ub, dip * dkq * djq, MPI_DOUBLE, commJ_);
+  #endif
   gpuFor(
-    dip,dkq,nj/2+1,
-    GPU_LAMBDA(const int i, const int k, const int j) {
-      const int ik = (i*dkq+k)*nj;
-      if (j == 0) {
-        ua[ik] = ub[(i*dkq+k)*djq];
-      } else if (j+j == nj) {
-        const int qa = (nj-1)/djq;
-        const int ja = (nj-1)%djq;
-        ua[ik+nj/2] = -ub[((qa*dip+i)*dkq+k)*djq+ja];
-      } else {
-        const int qa = (j+j-1)/djq;
-        const int ja = (j+j-1)%djq;
-        ua[ik+nj-j] = -ub[((qa*dip+i)*dkq+k)*djq+ja];
-        const int qb = (j+j)/djq;
-        const int jb = (j+j)%djq;
-        ua[ik+j] = ub[((qb*dip+i)*dkq+k)*djq+jb];
-      }
-    });
-  CHECK(cufftExecD2Z(d2zj_,ua,uc));
+      dip, dkq, nj / 2 + 1, GPU_LAMBDA(const int i, const int k, const int j) {
+        const int ik = (i * dkq + k) * nj;
+        if (j == 0) {
+          ua[ik] = ub[(i * dkq + k) * djq];
+        } else if (j + j == nj) {
+          const int qa    = (nj - 1) / djq;
+          const int ja    = (nj - 1) % djq;
+          ua[ik + nj / 2] = -ub[((qa * dip + i) * dkq + k) * djq + ja];
+        } else {
+          const int qa    = (j + j - 1) / djq;
+          const int ja    = (j + j - 1) % djq;
+          ua[ik + nj - j] = -ub[((qa * dip + i) * dkq + k) * djq + ja];
+          const int qb    = (j + j) / djq;
+          const int jb    = (j + j) % djq;
+          ua[ik + j]      = ub[((qb * dip + i) * dkq + k) * djq + jb];
+        }
+      });
+  GPU_Error_Check(cufftExecD2Z(d2zj_, ua, uc));
   gpuFor(
-    dkq,nj/2+1,dip,
-    GPU_LAMBDA(const int k, const int j, const int i) {
-      if (j == 0) {
-        const int pa = (nj-1)/djp;
-        const int ja = (nj-1)%djp;
-        ua[((pa*dkq+k)*djp+ja)*dip+i] = 2.0*ub[(i*dkq+k)*nj2];
-      } else if (j+j == nj) {
-        const int pa = (nj/2-1)/djp;
-        const int ja = (nj/2-1)%djp;
-        ua[((pa*dkq+k)*djp+ja)*dip+i] = sqrt2*ub[(i*dkq+k)*nj2+nj];
-      } else {
-        const double aj = 2.0*ub[(i*dkq+k)*nj2+2*j];
-        const double bj = 2.0*ub[(i*dkq+k)*nj2+2*j+1];
-        double wa,wb;
-        sincospi(double(j)/double(nj+nj),&wb,&wa);
-        const int pa = (nj-j-1)/djp;
-        const int ja = (nj-j-1)%djp;
-        const int pb = (j-1)/djp;
-        const int jb = (j-1)%djp;
-        ua[((pa*dkq+k)*djp+ja)*dip+i] = wa*aj+wb*bj;
-        ua[((pb*dkq+k)*djp+jb)*dip+i] = wb*aj-wa*bj;
-      }
-    });
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,dkq*djp*dip,MPI_DOUBLE,hb_,dkq*djp*dip,MPI_DOUBLE,commI_);
-  CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(ua,dkq*djp*dip,MPI_DOUBLE,ub,dkq*djp*dip,MPI_DOUBLE,commI_);
-#endif
+      dkq, nj / 2 + 1, dip, GPU_LAMBDA(const int k, const int j, const int i) {
+        if (j == 0) {
+          const int pa                              = (nj - 1) / djp;
+          const int ja                              = (nj - 1) % djp;
+          ua[((pa * dkq + k) * djp + ja) * dip + i] = 2.0 * ub[(i * dkq + k) * nj2];
+        } else if (j + j == nj) {
+          const int pa                              = (nj / 2 - 1) / djp;
+          const int ja                              = (nj / 2 - 1) % djp;
+          ua[((pa * dkq + k) * djp + ja) * dip + i] = sqrt2 * ub[(i * dkq + k) * nj2 + nj];
+        } else {
+          const double aj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j];
+          const double bj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j + 1];
+          double wa, wb;
+          sincospi(double(j) / double(nj + nj), &wb, &wa);
+          const int pa                              = (nj - j - 1) / djp;
+          const int ja                              = (nj - j - 1) % djp;
+          const int pb                              = (j - 1) / djp;
+          const int jb                              = (j - 1) % djp;
+          ua[((pa * dkq + k) * djp + ja) * dip + i] = wa * aj + wb * bj;
+          ua[((pb * dkq + k) * djp + jb) * dip + i] = wb * aj - wa * bj;
+        }
+      });
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_);
+  GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_);
+  #endif
   gpuFor(
-    dkq,djp,ni/2+1,
-    GPU_LAMBDA(const int k, const int j, const int i) {
-      const int kj = (k*djp+j)*ni;
-      if (i == 0) {
-        ua[kj] = ub[(k*djp+j)*dip];
-      } else if (i+i == ni) {
-        const int ida = (ni-1)/di;
-        const int pa = (ni-1)%di/dip;
-        const int ia = ni-1-ida*di-pa*dip;
-        ua[kj+ni/2] = -ub[(((ida*mp+pa)*dkq+k)*djp+j)*dip+ia];
-      } else {
-        const int ida = (i+i-1)/di;
-        const int pa = (i+i-1)%di/dip;
-        const int ia = i+i-1-ida*di-pa*dip;
-        ua[kj+ni-i] = -ub[(((ida*mp+pa)*dkq+k)*djp+j)*dip+ia];
-        const int idb = (i+i)/di;
-        const int pb = (i+i)%di/dip;
-        const int ib = i+i-idb*di-pb*dip;
-        ua[kj+i] = ub[(((idb*mp+pb)*dkq+k)*djp+j)*dip+ib];
-      }
-    });
-  CHECK(cufftExecD2Z(d2zi_,ua,uc));
-  {
-#ifdef PARIS_GALACTIC_3PT
-    const double si = M_PI/double(ni+ni);
-    const double sj = M_PI/double(nj+nj);
-    const double sk = M_PI/double(nk+nk);
-    const double iin = sqr(sin(double(ni)*si)*ddi);
-#elif defined PARIS_GALACTIC_5PT
-    const double si = M_PI/double(ni);
-    const double sj = M_PI/double(nj);
-    const double sk = M_PI/double(nk);
-    const double cin = cos(double(ni)*si);
-    const double iin = ddi*(2.0*cin*cin-16.0*cin+14.0);
-#else
-    const double iin = sqr(double(ni)*ddi);
-#endif
-    const int jLo = (idi*mp+idp)*djp;
-    const int kLo = (idj*mq+idq)*dkq;
-    gpuFor(
-      dkq,djp,ni/2+1,
-      GPU_LAMBDA(const int k, const int j, const int i) {
-        const int kj = (k*djp+j)*ni;
-        const int kj2 = (k*djp+j)*ni2;
-#ifdef PARIS_GALACTIC_3PT
-        const double jjkk = sqr(sin(double(jLo+j+1)*sj)*ddj)+sqr(sin(double(kLo+k+1)*sk)*ddk);
-#elif defined PARIS_GALACTIC_5PT
-        const double cj = cos(double(jLo+j+1)*sj);
-        const double jj = ddj*(2.0*cj*cj-16.0*cj+14.0);
-        const double ck = cos(double(kLo+k+1)*sk);
-        const double kk = ddk*(2.0*ck*ck-16.0*ck+14.0);
-        const double jjkk = jj+kk;
-#else
-        const double jjkk = sqr(double(jLo+j+1)*ddj)+sqr(double(kLo+k+1)*ddk);
-#endif
+      dkq, djp, ni / 2 + 1, GPU_LAMBDA(const int k, const int j, const int i) {
+        const int kj = (k * djp + j) * ni;
         if (i == 0) {
-          ua[kj] = -2.0*ub[kj2]/(iin+jjkk);
+          ua[kj] = ub[(k * djp + j) * dip];
+        } else if (i + i == ni) {
+          const int ida   = (ni - 1) / di;
+          const int pa    = (ni - 1) % di / dip;
+          const int ia    = ni - 1 - ida * di - pa * dip;
+          ua[kj + ni / 2] = -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia];
         } else {
-#ifdef PARIS_GALACTIC_3PT
-          const double ii = sqr(sin(double(i)*si)*ddi);
-#elif defined PARIS_GALACTIC_5PT
-          const double ci = cos(double(i)*si);
-          const double ii = ddi*(2.0*ci*ci-16.0*ci+14.0);
-#else
-          const double ii = sqr(double(i)*ddi);
-#endif
-          if (i+i == ni) {
-            ua[kj+ni/2] = -2.0*ub[kj2+ni]/(ii+jjkk);
-          } else {
-            const double ai = 2.0*ub[kj2+2*i];
-            const double bi = 2.0*ub[kj2+2*i+1];
-            double wa,wb;
-            sincospi(double(i)/double(ni+ni),&wb,&wa);
-#ifdef PARIS_GALACTIC_3PT
-            const double nii = sqr(sin(double(ni-i)*si)*ddi);
-#elif defined PARIS_GALACTIC_5PT
-            const double cni = cos(double(ni-i)*si);
-            const double nii = ddi*(2.0*cni*cni-16.0*cni+14.0);
-#else
-            const double nii = sqr(double(ni-i)*ddi);
-#endif
-            const double aai = -(wa*ai+wb*bi)/(nii+jjkk);
-            const double bbi = (wa*bi-wb*ai)/(ii+jjkk);
-            const double apb = aai+bbi;
-            const double amb = aai-bbi;
-            ua[kj+i] = wa*amb+wb*apb;
-            ua[kj+ni-i] = wa*apb-wb*amb;
-          }
+          const int ida   = (i + i - 1) / di;
+          const int pa    = (i + i - 1) % di / dip;
+          const int ia    = i + i - 1 - ida * di - pa * dip;
+          ua[kj + ni - i] = -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia];
+          const int idb   = (i + i) / di;
+          const int pb    = (i + i) % di / dip;
+          const int ib    = i + i - idb * di - pb * dip;
+          ua[kj + i]      = ub[(((idb * mp + pb) * dkq + k) * djp + j) * dip + ib];
         }
       });
+  GPU_Error_Check(cufftExecD2Z(d2zi_, ua, uc));
+  {
+  #ifdef PARIS_GALACTIC_3PT
+    const double si  = M_PI / double(ni + ni);
+    const double sj  = M_PI / double(nj + nj);
+    const double sk  = M_PI / double(nk + nk);
+    const double iin = Sqr(sin(double(ni) * si) * ddi);
+  #elif defined PARIS_GALACTIC_5PT
+    const double si  = M_PI / double(ni);
+    const double sj  = M_PI / double(nj);
+    const double sk  = M_PI / double(nk);
+    const double cin = cos(double(ni) * si);
+    const double iin = ddi * (2.0 * cin * cin - 16.0 * cin + 14.0);
+  #else
+    const double iin = Sqr(double(ni) * ddi);
+  #endif
+    const int jLo = (idi * mp + idp) * djp;
+    const int kLo = (idj * mq + idq) * dkq;
+    gpuFor(
+        dkq, djp, ni / 2 + 1, GPU_LAMBDA(const int k, const int j, const int i) {
+          const int kj  = (k * djp + j) * ni;
+          const int kj2 = (k * djp + j) * ni2;
+  #ifdef PARIS_GALACTIC_3PT
+          const double jjkk = Sqr(sin(double(jLo + j + 1) * sj) * ddj) + Sqr(sin(double(kLo + k + 1) * sk) * ddk);
+  #elif defined PARIS_GALACTIC_5PT
+          const double cj   = cos(double(jLo + j + 1) * sj);
+          const double jj   = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0);
+          const double ck   = cos(double(kLo + k + 1) * sk);
+          const double kk   = ddk * (2.0 * ck * ck - 16.0 * ck + 14.0);
+          const double jjkk = jj + kk;
+  #else
+          const double jjkk =
+              Sqr(double(jLo + j + 1) * ddj) + Sqr(double(kLo + k + 1) * ddk);
+  #endif
+          if (i == 0) {
+            ua[kj] = -2.0 * ub[kj2] / (iin + jjkk);
+          } else {
+  #ifdef PARIS_GALACTIC_3PT
+            const double ii = Sqr(sin(double(i) * si) * ddi);
+  #elif defined PARIS_GALACTIC_5PT
+            const double ci = cos(double(i) * si);
+            const double ii = ddi * (2.0 * ci * ci - 16.0 * ci + 14.0);
+  #else
+            const double ii = Sqr(double(i) * ddi);
+  #endif
+            if (i + i == ni) {
+              ua[kj + ni / 2] = -2.0 * ub[kj2 + ni] / (ii + jjkk);
+            } else {
+              const double ai = 2.0 * ub[kj2 + 2 * i];
+              const double bi = 2.0 * ub[kj2 + 2 * i + 1];
+              double wa, wb;
+              sincospi(double(i) / double(ni + ni), &wb, &wa);
+  #ifdef PARIS_GALACTIC_3PT
+              const double nii = t(sin(double(ni - i) * si) * ddi);
+  #elif defined PARIS_GALACTIC_5PT
+              const double cni = cos(double(ni - i) * si);
+              const double nii = ddi * (2.0 * cni * cni - 16.0 * cni + 14.0);
+  #else
+              const double nii = Sqr(double(ni - i) * ddi);
+  #endif
+              const double aai = -(wa * ai + wb * bi) / (nii + jjkk);
+              const double bbi = (wa * bi - wb * ai) / (ii + jjkk);
+              const double apb = aai + bbi;
+              const double amb = aai - bbi;
+              ua[kj + i]       = wa * amb + wb * apb;
+              ua[kj + ni - i]  = wa * apb - wb * amb;
+            }
+          }
+        });
   }
-  CHECK(cufftExecD2Z(d2zi_,ua,uc));
+  GPU_Error_Check(cufftExecD2Z(d2zi_, ua, uc));
   gpuFor(
-    dkq,ni/2+1,djp,
-    GPU_LAMBDA(const int k, const int i, const int j) {
-      if (i == 0) {
-        ua[k*dip*djp+j] = ub[(k*djp+j)*ni2];
-      } else if (i+i == ni) {
-        const int ida = (ni-1)/di;
-        const int pa = (ni-1)%di/dip;
-        const int ia = ni-1-ida*di-pa*dip;
-        ua[(((ida*mp+pa)*dkq+k)*dip+ia)*djp+j] = -ub[(k*djp+j)*ni2+ni];
-      } else {
-        const double ai = ub[(k*djp+j)*ni2+i+i];
-        const double bi = ub[(k*djp+j)*ni2+i+i+1];
-        const int ida = (i+i-1)/di;
-        const int pa = (i+i-1)%di/dip;
-        const int ia = i+i-1-ida*di-pa*dip;
-        ua[(((ida*mp+pa)*dkq+k)*dip+ia)*djp+j] = bi-ai;
-        const int idb = (i+i)/di;
-        const int pb = (i+i)%di/dip;
-        const int ib = i+i-idb*di-pb*dip;
-        ua[(((idb*mp+pb)*dkq+k)*dip+ib)*djp+j] = ai+bi;
-      }
-    });
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,dkq*djp*dip,MPI_DOUBLE,hb_,dkq*djp*dip,MPI_DOUBLE,commI_);
-  CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(ua,dkq*djp*dip,MPI_DOUBLE,ub,dkq*djp*dip,MPI_DOUBLE,commI_);
-#endif
+      dkq, ni / 2 + 1, djp, GPU_LAMBDA(const int k, const int i, const int j) {
+        if (i == 0) {
+          ua[k * dip * djp + j] = ub[(k * djp + j) * ni2];
+        } else if (i + i == ni) {
+          const int ida                                          = (ni - 1) / di;
+          const int pa                                           = (ni - 1) % di / dip;
+          const int ia                                           = ni - 1 - ida * di - pa * dip;
+          ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = -ub[(k * djp + j) * ni2 + ni];
+        } else {
+          const double ai                                        = ub[(k * djp + j) * ni2 + i + i];
+          const double bi                                        = ub[(k * djp + j) * ni2 + i + i + 1];
+          const int ida                                          = (i + i - 1) / di;
+          const int pa                                           = (i + i - 1) % di / dip;
+          const int ia                                           = i + i - 1 - ida * di - pa * dip;
+          ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = bi - ai;
+          const int idb                                          = (i + i) / di;
+          const int pb                                           = (i + i) % di / dip;
+          const int ib                                           = i + i - idb * di - pb * dip;
+          ua[(((idb * mp + pb) * dkq + k) * dip + ib) * djp + j] = ai + bi;
+        }
+      });
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_);
+  GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_);
+  #endif
   gpuFor(
-    dkq,dip,nj/2+1,
-    GPU_LAMBDA(const int k, const int i, const int j) {
-      const long ki = (k*dip+i)*nj;
-      if (j == 0) {
-        const int pa = (nj-1)/djp;
-        const int ja = (nj-1)-pa*djp;
-        ua[ki] = ub[((pa*dkq+k)*dip+i)*djp+ja];
-      } else if (j+j == nj) {
-        const int pa = (nj/2-1)/djp;
-        const int ja = nj/2-1-pa*djp;
-        ua[ki+nj/2] = sqrt2*ub[((pa*dkq+k)*dip+i)*djp+ja];
-      } else {
-        const int pa = (nj-1-j)/djp;
-        const int ja = nj-1-j-pa*djp;
-        const double aj = ub[((pa*dkq+k)*dip+i)*djp+ja];
-        const int pb = (j-1)/djp;
-        const int jb = j-1-pb*djp;
-        const double bj = ub[((pb*dkq+k)*dip+i)*djp+jb];
-        const double apb = aj+bj;
-        const double amb = aj-bj;
-        double wa,wb;
-        sincospi(double(j)/double(nj+nj),&wb,&wa);
-        ua[ki+j] = wa*amb+wb*apb;
-        ua[ki+nj-j] = wa*apb-wb*amb;
-      }
-    });
-  CHECK(cufftExecD2Z(d2zj_,ua,uc));
+      dkq, dip, nj / 2 + 1, GPU_LAMBDA(const int k, const int i, const int j) {
+        const long ki = (k * dip + i) * nj;
+        if (j == 0) {
+          const int pa = (nj - 1) / djp;
+          const int ja = (nj - 1) - pa * djp;
+          ua[ki]       = ub[((pa * dkq + k) * dip + i) * djp + ja];
+        } else if (j + j == nj) {
+          const int pa    = (nj / 2 - 1) / djp;
+          const int ja    = nj / 2 - 1 - pa * djp;
+          ua[ki + nj / 2] = sqrt2 * ub[((pa * dkq + k) * dip + i) * djp + ja];
+        } else {
+          const int pa     = (nj - 1 - j) / djp;
+          const int ja     = nj - 1 - j - pa * djp;
+          const double aj  = ub[((pa * dkq + k) * dip + i) * djp + ja];
+          const int pb     = (j - 1) / djp;
+          const int jb     = j - 1 - pb * djp;
+          const double bj  = ub[((pb * dkq + k) * dip + i) * djp + jb];
+          const double apb = aj + bj;
+          const double amb = aj - bj;
+          double wa, wb;
+          sincospi(double(j) / double(nj + nj), &wb, &wa);
+          ua[ki + j]      = wa * amb + wb * apb;
+          ua[ki + nj - j] = wa * apb - wb * amb;
+        }
+      });
+  GPU_Error_Check(cufftExecD2Z(d2zj_, ua, uc));
   gpuFor(
-    dip,nj/2+1,dkq,
-    GPU_LAMBDA(const int i, const int j, const int k) {
-      if (j == 0) {
-        ua[i*djq*dkq+k] = ub[(k*dip+i)*nj2];
-      } else if (j+j == nj) {
-        const int ida = (nj-1)/dj;
-        const int qa = (nj-1)%dj/djq;
-        const int ja = nj-1-ida*dj-qa*djq;
-        ua[(((ida*mq+qa)*dip+i)*djq+ja)*dkq+k] = -ub[(k*dip+i)*nj2+nj];
-      } else {
-        const int jj = j+j;
-        const int ida = (jj-1)/dj;
-        const int qa = (jj-1)%dj/djq;
-        const int ja = jj-1-ida*dj-qa*djq;
-        const int idb = jj/dj;
-        const int qb = jj%dj/djq;
-        const int jb = jj-idb*dj-qb*djq;
-        const double aj = ub[(k*dip+i)*nj2+jj];
-        const double bj = ub[(k*dip+i)*nj2+jj+1];
-        ua[(((ida*mq+qa)*dip+i)*djq+ja)*dkq+k] = bj-aj;
-        ua[(((idb*mq+qb)*dip+i)*djq+jb)*dkq+k] = aj+bj;
-      }
-    });
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,dip*djq*dkq,MPI_DOUBLE,hb_,dip*djq*dkq,MPI_DOUBLE,commJ_);
-  CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(ua,dip*djq*dkq,MPI_DOUBLE,ub,dip*djq*dkq,MPI_DOUBLE,commJ_);
-#endif
+      dip, nj / 2 + 1, dkq, GPU_LAMBDA(const int i, const int j, const int k) {
+        if (j == 0) {
+          ua[i * djq * dkq + k] = ub[(k * dip + i) * nj2];
+        } else if (j + j == nj) {
+          const int ida                                          = (nj - 1) / dj;
+          const int qa                                           = (nj - 1) % dj / djq;
+          const int ja                                           = nj - 1 - ida * dj - qa * djq;
+          ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = -ub[(k * dip + i) * nj2 + nj];
+        } else {
+          const int jj                                           = j + j;
+          const int ida                                          = (jj - 1) / dj;
+          const int qa                                           = (jj - 1) % dj / djq;
+          const int ja                                           = jj - 1 - ida * dj - qa * djq;
+          const int idb                                          = jj / dj;
+          const int qb                                           = jj % dj / djq;
+          const int jb                                           = jj - idb * dj - qb * djq;
+          const double aj                                        = ub[(k * dip + i) * nj2 + jj];
+          const double bj                                        = ub[(k * dip + i) * nj2 + jj + 1];
+          ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = bj - aj;
+          ua[(((idb * mq + qb) * dip + i) * djq + jb) * dkq + k] = aj + bj;
+        }
+      });
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, dip * djq * dkq, MPI_DOUBLE, hb_, dip * djq * dkq, MPI_DOUBLE, commJ_);
+  GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(ua, dip * djq * dkq, MPI_DOUBLE, ub, dip * djq * dkq, MPI_DOUBLE, commJ_);
+  #endif
   gpuFor(
-    dip,djq,nk/2+1,
-    GPU_LAMBDA(const int i, const int j, const int k) {
-      const long ij = (i*djq+j)*nk;
-      if (k == 0) {
-        const int qa = (nk-1)/dkq;
-        const int ka = nk-1-qa*dkq;
-        ua[ij] = ub[((qa*dip+i)*djq+j)*dkq+ka];
-      } else if (k+k == nk) {
-        const int qa = (nk/2-1)/dkq;
-        const int ka = nk/2-1-qa*dkq;
-        ua[ij+nk/2] = sqrt2*ub[((qa*dip+i)*djq+j)*dkq+ka];
-      } else {
-        const int qa = (nk-1-k)/dkq;
-        const int ka = nk-1-k-qa*dkq;
-        const double ak = ub[((qa*dip+i)*djq+j)*dkq+ka];
-        const int qb = (k-1)/dkq;
-        const int kb = k-1-qb*dkq;
-        const double bk = ub[((qb*dip+i)*djq+j)*dkq+kb];
-        const double apb = ak+bk;
-        const double amb = ak-bk;
-        double wa,wb;
-        sincospi(double(k)/double(nk+nk),&wb,&wa);
-        ua[ij+k] = wa*amb+wb*apb;
-        ua[ij+nk-k] = wa*apb-wb*amb;
-      }
-    });
-  CHECK(cufftExecD2Z(d2zk_,ua,uc));
-  const double divN = 1.0/(8.0*double(ni)*double(nj)*double(nk));
+      dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) {
+        const long ij = (i * djq + j) * nk;
+        if (k == 0) {
+          const int qa = (nk - 1) / dkq;
+          const int ka = nk - 1 - qa * dkq;
+          ua[ij]       = ub[((qa * dip + i) * djq + j) * dkq + ka];
+        } else if (k + k == nk) {
+          const int qa    = (nk / 2 - 1) / dkq;
+          const int ka    = nk / 2 - 1 - qa * dkq;
+          ua[ij + nk / 2] = sqrt2 * ub[((qa * dip + i) * djq + j) * dkq + ka];
+        } else {
+          const int qa     = (nk - 1 - k) / dkq;
+          const int ka     = nk - 1 - k - qa * dkq;
+          const double ak  = ub[((qa * dip + i) * djq + j) * dkq + ka];
+          const int qb     = (k - 1) / dkq;
+          const int kb     = k - 1 - qb * dkq;
+          const double bk  = ub[((qb * dip + i) * djq + j) * dkq + kb];
+          const double apb = ak + bk;
+          const double amb = ak - bk;
+          double wa, wb;
+          sincospi(double(k) / double(nk + nk), &wb, &wa);
+          ua[ij + k]      = wa * amb + wb * apb;
+          ua[ij + nk - k] = wa * apb - wb * amb;
+        }
+      });
+  GPU_Error_Check(cufftExecD2Z(d2zk_, ua, uc));
+  const double divN = 1.0 / (8.0 * double(ni) * double(nj) * double(nk));
   gpuFor(
-    dip,djq,nk/2+1,
-    GPU_LAMBDA(const int i, const int j, const int k) {
-      if (k == 0) {
-        ua[(i*djq+j)*dk] = divN*ub[(i*djq+j)*nk2];
-      } else if (k+k == nk) {
-        const int pqa = (nk-1)/dk;
-        const int ka = nk-1-pqa*dk;
-        ua[((pqa*dip+i)*djq+j)*dk+ka] = -divN*ub[(i*djq+j)*nk2+nk];
-      } else {
-        const int kk = k+k;
-        const double ak = ub[(i*djq+j)*nk2+kk];
-        const double bk = ub[(i*djq+j)*nk2+kk+1];
-        const int pqa = (kk-1)/dk;
-        const int ka = kk-1-pqa*dk;
-        ua[((pqa*dip+i)*djq+j)*dk+ka] = divN*(bk-ak);
-        const int pqb = kk/dk;
-        const int kb = kk-pqb*dk;
-        ua[((pqb*dip+i)*djq+j)*dk+kb] = divN*(ak+bk);
-      }
-    });
-#ifndef MPI_GPU
-  CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost));
-  MPI_Alltoall(ha_,dip*djq*dk,MPI_DOUBLE,hb_,dip*djq*dk,MPI_DOUBLE,commK_);
-  CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0));
-#else
-  CHECK(cudaDeviceSynchronize());
-  MPI_Alltoall(ua,dip*djq*dk,MPI_DOUBLE,ub,dip*djq*dk,MPI_DOUBLE,commK_);
-#endif
+      dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) {
+        if (k == 0) {
+          ua[(i * djq + j) * dk] = divN * ub[(i * djq + j) * nk2];
+        } else if (k + k == nk) {
+          const int pqa                             = (nk - 1) / dk;
+          const int ka                              = nk - 1 - pqa * dk;
+          ua[((pqa * dip + i) * djq + j) * dk + ka] = -divN * ub[(i * djq + j) * nk2 + nk];
+        } else {
+          const int kk                              = k + k;
+          const double ak                           = ub[(i * djq + j) * nk2 + kk];
+          const double bk                           = ub[(i * djq + j) * nk2 + kk + 1];
+          const int pqa                             = (kk - 1) / dk;
+          const int ka                              = kk - 1 - pqa * dk;
+          ua[((pqa * dip + i) * djq + j) * dk + ka] = divN * (bk - ak);
+          const int pqb                             = kk / dk;
+          const int kb                              = kk - pqb * dk;
+          ua[((pqb * dip + i) * djq + j) * dk + kb] = divN * (ak + bk);
+        }
+      });
+  #ifndef MPI_GPU
+  GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost));
+  MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_);
+  GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0));
+  #else
+  GPU_Error_Check(cudaDeviceSynchronize());
+  MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_);
+  #endif
   gpuFor(
-    mp,dip,mq,djq,dk,
-    GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) {
-      const int iLo = p*dip;
-      const int jLo = q*djq;
-      if ((iLo+i < di) && (jLo+j < dj)) ua[((i+iLo)*dj+j+jLo)*dk+k] = ub[(((p*mq+q)*dip+i)*djq+j)*dk+k];
-    });
+      mp, dip, mq, djq, dk, GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) {
+        const int iLo = p * dip;
+        const int jLo = q * djq;
+        if ((iLo + i < di) && (jLo + j < dj)) {
+          ua[((i + iLo) * dj + j + jLo) * dk + k] = ub[(((p * mq + q) * dip + i) * djq + j) * dk + k];
+        }
+      });
 }
 
 #endif
diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp
index 8d868b54d..0094f5b0d 100644
--- a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp
+++ b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp
@@ -1,28 +1,31 @@
 #pragma once
 
 #include <mpi.h>
+
 #include "../../utils/gpu.hpp"
 
-class PoissonZero3DBlockedGPU {
-  public:
-    PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]);
-    ~PoissonZero3DBlockedGPU();
-    long bytes() const { return bytes_; }
-    void solve(long bytes, double *density, double *potential) const;
-  private:
-    double ddi_,ddj_,ddk_;
-    int idi_,idj_,idk_;
-    int mi_,mj_,mk_;
-    int ni_,nj_,nk_;
-    int mp_,mq_;
-    int idp_,idq_;
-    MPI_Comm commI_,commJ_,commK_;
-    int di_,dj_,dk_;
-    int dip_,djp_,djq_,dkq_;
-    int ni2_,nj2_,nk2_;
-    long bytes_;
-    cufftHandle d2zi_,d2zj_,d2zk_;
+class PoissonZero3DBlockedGPU
+{
+ public:
+  PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]);
+  ~PoissonZero3DBlockedGPU();
+  long bytes() const { return bytes_; }
+  void solve(long bytes, double *density, double *potential) const;
+
+ private:
+  double ddi_, ddj_, ddk_;
+  int idi_, idj_, idk_;
+  int mi_, mj_, mk_;
+  int ni_, nj_, nk_;
+  int mp_, mq_;
+  int idp_, idq_;
+  MPI_Comm commI_, commJ_, commK_;
+  int di_, dj_, dk_;
+  int dip_, djp_, djq_, dkq_;
+  int ni2_, nj2_, nk2_;
+  long bytes_;
+  cufftHandle d2zi_, d2zj_, d2zk_;
 #ifndef MPI_GPU
-    double *ha_, *hb_;
+  double *ha_, *hb_;
 #endif
 };
diff --git a/src/gravity/paris/README.md b/src/gravity/paris/README.md
index a73664fa3..d019d5e1f 100644
--- a/src/gravity/paris/README.md
+++ b/src/gravity/paris/README.md
@@ -8,7 +8,7 @@ A 3D Poisson solver that expects periodic boundary conditions.
 
 *ParisPeriodic* calls the FFT filter provided by the *HenryPeriodic* class, where it provides a C++ lambda function that solves the Poisson equation in frequency space.
 It assumes fields in a 3D block distribution with no ghost cells.
-It is used by the Cholla class *Potential_Paris_3D* to solve Poisson problems with periodic boundary conditions.
+It is used by the Cholla class *PotentialParis3D* to solve Poisson problems with periodic boundary conditions.
 
 To use:
 - Construct a *ParisPeriodic* object using information about the global domain and local MPI task.
@@ -44,12 +44,12 @@ A 3D Poisson solver that expects zero-valued boundary conditions.
 
 *PoissonZero3DBlockedGPU* uses discrete sine transforms (DSTs) instead of Fourier transforms to enforce zero-valued, non-periodic boundary conditions.
 It is currently a monolithic class, not depenedent on a *Henry* class.
-It is used by the Cholla class *Potential_Paris_Galactic* to solve Poisson problems with non-zero, non-periodic, analytic boundary conditions.
+It is used by the Cholla class *PotentialParisGalactic* to solve Poisson problems with non-zero, non-periodic, analytic boundary conditions.
 
-*Potential_Paris_Galactic::Get_Potential()* uses *PoissonZero3DBlockedGPU::solve()* as follows.
+*PotentialParisGalactic::Get_Potential()* uses *PoissonZero3DBlockedGPU::solve()* as follows.
 - Subtract an analytic density from the input density, where the analytic density matches the input density at the domain boundaries.
 This results in a density with zero-valued boundaries.
-- Call *PoissonZero3DBlockedGPU::solve()* with this density with zero-valued boundaries. 
+- Call *PoissonZero3DBlockedGPU::solve()* with this density with zero-valued boundaries.
 - Add an analytic potential to the resulting potential, where the analytic potential is the solution to the Poisson equation for the analytic density that was subtracted from the input density.
 The resulting sum of potentials is the solution to the Poisson problem for the full input density.
 
diff --git a/src/gravity/potential_SOR_3D.cpp b/src/gravity/potential_SOR_3D.cpp
index a7a0b4d2f..0cffeb981 100644
--- a/src/gravity/potential_SOR_3D.cpp
+++ b/src/gravity/potential_SOR_3D.cpp
@@ -1,20 +1,22 @@
 #if defined(GRAVITY) && defined(SOR)
 
-#include "../gravity/potential_SOR_3D.h"
-#include "../io/io.h"
-#include <iostream>
-#include <cmath>
-#include "../grid/grid3D.h"
+  #include "../gravity/potential_SOR_3D.h"
 
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif
+  #include <cmath>
+  #include <iostream>
 
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
 
-Potential_SOR_3D::Potential_SOR_3D( void ){}
+  #ifdef MPI_CHOLLA
+    #include "../mpi/mpi_routines.h"
+  #endif
 
-void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real){
+Potential_SOR_3D::Potential_SOR_3D(void) {}
 
+void Potential_SOR_3D::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz,
+                                  int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real)
+{
   Lbox_x = Lx;
   Lbox_y = Ly;
   Lbox_z = Lz;
@@ -33,13 +35,13 @@ void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y
 
   n_ghost = N_GHOST_POTENTIAL;
 
-  nx_pot = nx_local + 2*n_ghost;
-  ny_pot = ny_local + 2*n_ghost;
-  nz_pot = nz_local + 2*n_ghost;
+  nx_pot = nx_local + 2 * n_ghost;
+  ny_pot = ny_local + 2 * n_ghost;
+  nz_pot = nz_local + 2 * n_ghost;
 
-  n_cells_local = nx_local*ny_local*nz_local;
-  n_cells_potential = nx_pot*ny_pot*nz_pot;
-  n_cells_total = nx_total*ny_total*nz_total;
+  n_cells_local     = nx_local * ny_local * nz_local;
+  n_cells_potential = nx_pot * ny_pot * nz_pot;
+  n_cells_total     = nx_total * ny_total * nz_total;
 
   n_ghost_transfer = 1;
 
@@ -51,114 +53,129 @@ void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y
   size_buffer_x = n_ghost_transfer * ny_local * nz_local;
   size_buffer_y = n_ghost_transfer * nx_local * nz_local;
   size_buffer_z = n_ghost_transfer * nx_local * ny_local;
-  if ( size_buffer_x%2 !=0 ) chprintf( " SOR Warning: Buffer X not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n");
-  else size_buffer_x /= 2;
-  if ( size_buffer_y%2 !=0 ) chprintf( " SOR Warning: Buffer Y not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n");
-  else size_buffer_y /= 2;
-  if ( size_buffer_z%2 !=0 ) chprintf( " SOR Warning: Buffer Y not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n");
-  else size_buffer_z /= 2;
+  if (size_buffer_x % 2 != 0)
+    chprintf(
+        " SOR Warning: Buffer X not divisible by 2, Disable "
+        "HALF_SIZE_BOUNDARIES \n");
+  else
+    size_buffer_x /= 2;
+  if (size_buffer_y % 2 != 0)
+    chprintf(
+        " SOR Warning: Buffer Y not divisible by 2, Disable "
+        "HALF_SIZE_BOUNDARIES \n");
+  else
+    size_buffer_y /= 2;
+  if (size_buffer_z % 2 != 0)
+    chprintf(
+        " SOR Warning: Buffer Y not divisible by 2, Disable "
+        "HALF_SIZE_BOUNDARIES \n");
+  else
+    size_buffer_z /= 2;
   #endif
 
-  //Flag to transfer Poisson Boundaries when calling Set_Boundaries
+  // Flag to transfer Poisson Boundaries when calling Set_Boundaries
   TRANSFER_POISSON_BOUNDARIES = false;
 
+  chprintf(" Using Poisson Solver: SOR\n");
+  chprintf("  SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local,
+           dx, dy, dz);
 
-  chprintf( " Using Poisson Solver: SOR\n");
-  chprintf( "  SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, dx, dy, dz );
-
-  chprintf( "  SOR: Allocating memory...\n");
+  chprintf("  SOR: Allocating memory...\n");
   AllocateMemory_CPU();
   AllocateMemory_GPU();
 
   potential_initialized = false;
-
 }
 
-
-void Potential_SOR_3D::AllocateMemory_CPU( void ){
-  F.output_h = (Real *) malloc(n_cells_local*sizeof(Real));
-  F.converged_h = (bool *) malloc(sizeof(bool));
-
+void Potential_SOR_3D::AllocateMemory_CPU(void)
+{
+  F.output_h    = (Real *)malloc(n_cells_local * sizeof(Real));
+  F.converged_h = (bool *)malloc(sizeof(bool));
 }
 
-
-void Potential_SOR_3D::AllocateMemory_GPU( void ){
-
-  Allocate_Array_GPU_Real( &F.input_d, n_cells_local );
-  Allocate_Array_GPU_Real( &F.density_d, n_cells_local );
-  Allocate_Array_GPU_Real( &F.potential_d, n_cells_potential );
-  Allocate_Array_GPU_bool( &F.converged_d, 1 );
-  Allocate_Array_GPU_Real( &F.boundaries_buffer_x0_d, size_buffer_x);
-  Allocate_Array_GPU_Real( &F.boundaries_buffer_x1_d, size_buffer_x);
-  Allocate_Array_GPU_Real( &F.boundaries_buffer_y0_d, size_buffer_y);
-  Allocate_Array_GPU_Real( &F.boundaries_buffer_y1_d, size_buffer_y);
-  Allocate_Array_GPU_Real( &F.boundaries_buffer_z0_d, size_buffer_z);
-  Allocate_Array_GPU_Real( &F.boundaries_buffer_z1_d, size_buffer_z);
+void Potential_SOR_3D::AllocateMemory_GPU(void)
+{
+  Allocate_Array_GPU_Real(&F.input_d, n_cells_local);
+  Allocate_Array_GPU_Real(&F.density_d, n_cells_local);
+  Allocate_Array_GPU_Real(&F.potential_d, n_cells_potential);
+  Allocate_Array_GPU_bool(&F.converged_d, 1);
+  Allocate_Array_GPU_Real(&F.boundaries_buffer_x0_d, size_buffer_x);
+  Allocate_Array_GPU_Real(&F.boundaries_buffer_x1_d, size_buffer_x);
+  Allocate_Array_GPU_Real(&F.boundaries_buffer_y0_d, size_buffer_y);
+  Allocate_Array_GPU_Real(&F.boundaries_buffer_y1_d, size_buffer_y);
+  Allocate_Array_GPU_Real(&F.boundaries_buffer_z0_d, size_buffer_z);
+  Allocate_Array_GPU_Real(&F.boundaries_buffer_z1_d, size_buffer_z);
 
   #ifdef MPI_CHOLLA
-  Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_x0_d, size_buffer_x);
-  Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_x1_d, size_buffer_x);
-  Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_y0_d, size_buffer_y);
-  Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_y1_d, size_buffer_y);
-  Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_z0_d, size_buffer_z);
-  Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_z1_d, size_buffer_z);
+  Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_x0_d, size_buffer_x);
+  Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_x1_d, size_buffer_x);
+  Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_y0_d, size_buffer_y);
+  Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_y1_d, size_buffer_y);
+  Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_z0_d, size_buffer_z);
+  Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_z1_d, size_buffer_z);
   #endif
 
   #ifdef GRAV_ISOLATED_BOUNDARY_X
-  Allocate_Array_GPU_Real( &F.boundary_isolated_x0_d, n_ghost*ny_local*nz_local );
-  Allocate_Array_GPU_Real( &F.boundary_isolated_x1_d, n_ghost*ny_local*nz_local );
+  Allocate_Array_GPU_Real(&F.boundary_isolated_x0_d, n_ghost * ny_local * nz_local);
+  Allocate_Array_GPU_Real(&F.boundary_isolated_x1_d, n_ghost * ny_local * nz_local);
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_X
-  Allocate_Array_GPU_Real( &F.boundary_isolated_y0_d, n_ghost*nx_local*nz_local );
-  Allocate_Array_GPU_Real( &F.boundary_isolated_y1_d, n_ghost*nx_local*nz_local );
+  Allocate_Array_GPU_Real(&F.boundary_isolated_y0_d, n_ghost * nx_local * nz_local);
+  Allocate_Array_GPU_Real(&F.boundary_isolated_y1_d, n_ghost * nx_local * nz_local);
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  Allocate_Array_GPU_Real( &F.boundary_isolated_z0_d, n_ghost*nx_local*ny_local );
-  Allocate_Array_GPU_Real( &F.boundary_isolated_z1_d, n_ghost*nx_local*ny_local );
+  Allocate_Array_GPU_Real(&F.boundary_isolated_z0_d, n_ghost * nx_local * ny_local);
+  Allocate_Array_GPU_Real(&F.boundary_isolated_z1_d, n_ghost * nx_local * ny_local);
   #endif
-
 }
 
-void Potential_SOR_3D::Copy_Input_And_Initialize( Real *input_density, const Real *const input_potential, Real Grav_Constant, Real dens_avrg, Real current_a ){
-  Copy_Input( n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, current_a );
-
-  if ( !potential_initialized ){
-    chprintf( "SOR: Initializing  Potential \n");
-    CHECK( cudaMemcpy( F.potential_d, input_potential, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice ) );
-    //Initialize_Potential( nx_local, ny_local, nz_local, n_ghost, F.potential_d, F.density_d );
+void Potential_SOR_3D::Copy_Input_And_Initialize(Real *input_density, const Real *const input_potential,
+                                                 Real Grav_Constant, Real dens_avrg, Real current_a)
+{
+  Copy_Input(n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, current_a);
+
+  if (!potential_initialized) {
+    chprintf("SOR: Initializing  Potential \n");
+    GPU_Error_Check(
+        cudaMemcpy(F.potential_d, input_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice));
+    // Initialize_Potential( nx_local, ny_local, nz_local, n_ghost,
+    // F.potential_d, F.density_d );
     potential_initialized = true;
   }
 }
 
-
-void Potential_SOR_3D::Poisson_Partial_Iteration( int n_step, Real omega, Real epsilon ){
-  if (n_step == 0 ) Poisson_iteration_Patial_1( n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, F.density_d, F.potential_d, F.converged_h, F.converged_d );
-  if (n_step == 1 ) Poisson_iteration_Patial_2( n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, F.density_d, F.potential_d, F.converged_h, F.converged_d );
+void Potential_SOR_3D::Poisson_Partial_Iteration(int n_step, Real omega, Real epsilon)
+{
+  if (n_step == 0)
+    Poisson_iteration_Patial_1(n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon,
+                               F.density_d, F.potential_d, F.converged_h, F.converged_d);
+  if (n_step == 1)
+    Poisson_iteration_Patial_2(n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon,
+                               F.density_d, F.potential_d, F.converged_h, F.converged_d);
 }
 
-
-void Grid3D::Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P ){
-
+void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct Parameters *P)
+{
   #ifdef TIME_SOR
   Real time_start, time_end, time;
-  time_start = get_time();
+  time_start = Get_Time();
   #endif
 
-  Grav.Poisson_solver.Copy_Input_And_Initialize( Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, current_a );
-
-  //Set Isolated Boundary Conditions
-  Grav.Copy_Isolated_Boundaries_To_GPU( P );
-  Grav.Poisson_solver.Set_Isolated_Boundary_Conditions( Grav.boundary_flags, P );
+  Grav.Poisson_solver.Copy_Input_And_Initialize(Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg,
+                                                current_a);
 
+  // Set Isolated Boundary Conditions
+  Grav.Copy_Isolated_Boundaries_To_GPU(P);
+  Grav.Poisson_solver.Set_Isolated_Boundary_Conditions(Grav.boundary_flags, P);
 
   Real epsilon = 1e-4;
   int max_iter = 10000000;
-  int n_iter = 0;
+  int n_iter   = 0;
 
   Grav.Poisson_solver.F.converged_h[0] = 0;
 
   // For Diriclet Boundaries
-  Real omega = 2. / ( 1 + M_PI / Grav.Poisson_solver.nx_total  );
+  Real omega = 2. / (1 + M_PI / Grav.Poisson_solver.nx_total);
 
   // For Periodic Boundaries
   // Real omega = 2. / ( 1 + 2*M_PI / nx_total  );
@@ -166,374 +183,398 @@ void Grid3D::Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current
 
   bool set_boundaries;
 
-  //Number of iterations in between boundary transfers
+  // Number of iterations in between boundary transfers
   int n_iter_per_boundaries_transfer = 1;
 
-
   // Iterate to solve Poisson equation
-  while ( Grav.Poisson_solver.F.converged_h[0] == 0 ) {
-
+  while (Grav.Poisson_solver.F.converged_h[0] == 0) {
     set_boundaries = false;
-    if ( n_iter % n_iter_per_boundaries_transfer == 0 ) set_boundaries = true;
+    if (n_iter % n_iter_per_boundaries_transfer == 0) set_boundaries = true;
 
     // First Partial Iteration
     Grav.Poisson_solver.iteration_parity = 0;
-    if ( set_boundaries ){
+    if (set_boundaries) {
       Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = true;
-      Set_Boundary_Conditions( *P );
+      Set_Boundary_Conditions(*P);
       Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false;
     }
-    Grav.Poisson_solver.Poisson_Partial_Iteration( Grav.Poisson_solver.iteration_parity, omega, epsilon );
-
+    Grav.Poisson_solver.Poisson_Partial_Iteration(Grav.Poisson_solver.iteration_parity, omega, epsilon);
 
     // Second Partial Iteration
     Grav.Poisson_solver.iteration_parity = 1;
-    if ( set_boundaries ){
+    if (set_boundaries) {
       Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = true;
-      Set_Boundary_Conditions( *P );
+      Set_Boundary_Conditions(*P);
       Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false;
     }
-    Grav.Poisson_solver.Poisson_Partial_Iteration( Grav.Poisson_solver.iteration_parity, omega, epsilon );
+    Grav.Poisson_solver.Poisson_Partial_Iteration(Grav.Poisson_solver.iteration_parity, omega, epsilon);
 
-    // Get convergence state
-    #ifdef MPI_CHOLLA
-    Grav.Poisson_solver.F.converged_h[0] = Grav.Poisson_solver.Get_Global_Converged( Grav.Poisson_solver.F.converged_h[0] );
-    #endif
+  // Get convergence state
+  #ifdef MPI_CHOLLA
+    Grav.Poisson_solver.F.converged_h[0] =
+        Grav.Poisson_solver.Get_Global_Converged(Grav.Poisson_solver.F.converged_h[0]);
+  #endif
 
-    //Only aloow to connverge after the boundaries have been transfere to avoid false convergence in the boundaries.
-    if ( set_boundaries == false ) Grav.Poisson_solver.F.converged_h[0] = 0;
+    // Only aloow to connverge after the boundaries have been transfere to avoid
+    // false convergence in the boundaries.
+    if (set_boundaries == false) Grav.Poisson_solver.F.converged_h[0] = 0;
 
     n_iter += 1;
-    if ( n_iter == max_iter ) break;
+    if (n_iter == max_iter) break;
   }
 
-  if ( n_iter == max_iter ) chprintf(" SOR: No convergence in %d iterations \n", n_iter);
-  else chprintf(" SOR: Converged in %d iterations \n", n_iter);
+  if (n_iter == max_iter)
+    chprintf(" SOR: No convergence in %d iterations \n", n_iter);
+  else
+    chprintf(" SOR: Converged in %d iterations \n", n_iter);
 
-  Grav.Poisson_solver.Copy_Output( Grav.F.potential_h );
+  Grav.Poisson_solver.Copy_Output(Grav.F.potential_h);
 
   #ifdef TIME_SOR
-  #ifdef MPI_CHOLLA
+    #ifdef MPI_CHOLLA
   MPI_Barrier(world);
+    #endif
+  time_end = Get_Time();
+  time     = (time_end - time_start);
+  chprintf(" SOR: Time = %f  seg\n", time);
   #endif
-  time_end = get_time();
-  time = (time_end - time_start);
-  chprintf( " SOR: Time = %f  seg\n", time );
-  #endif
-
-
 }
 
-void Grav3D::Copy_Isolated_Boundaries_To_GPU( struct parameters *P ){
-
-  if ( P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3 ) return;
+void Grav3D::Copy_Isolated_Boundaries_To_GPU(struct Parameters *P)
+{
+  if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3)
+    return;
 
   // chprintf( " Copying Isolated Boundaries \n");
-  if ( boundary_flags[0] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d,  Poisson_solver.n_ghost*ny_local*nz_local );
-  if ( boundary_flags[1] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d,  Poisson_solver.n_ghost*ny_local*nz_local );
-  if ( boundary_flags[2] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d,  Poisson_solver.n_ghost*nx_local*nz_local );
-  if ( boundary_flags[3] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d,  Poisson_solver.n_ghost*nx_local*nz_local );
-  if ( boundary_flags[4] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d,  Poisson_solver.n_ghost*nx_local*ny_local );
-  if ( boundary_flags[5] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d,  Poisson_solver.n_ghost*nx_local*ny_local );
-
-
+  if (boundary_flags[0] == 3)
+    Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d,
+                                         Poisson_solver.n_ghost * ny_local * nz_local);
+  if (boundary_flags[1] == 3)
+    Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d,
+                                         Poisson_solver.n_ghost * ny_local * nz_local);
+  if (boundary_flags[2] == 3)
+    Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d,
+                                         Poisson_solver.n_ghost * nx_local * nz_local);
+  if (boundary_flags[3] == 3)
+    Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d,
+                                         Poisson_solver.n_ghost * nx_local * nz_local);
+  if (boundary_flags[4] == 3)
+    Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d,
+                                         Poisson_solver.n_ghost * nx_local * ny_local);
+  if (boundary_flags[5] == 3)
+    Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d,
+                                         Poisson_solver.n_ghost * nx_local * ny_local);
 }
 
-void Potential_SOR_3D::Set_Isolated_Boundary_Conditions( int *boundary_flags, struct parameters *P ){
-
-
-  if ( P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3 ) return;
-
-  chprintf( " Setting Isolated Boundaries \n");
-  if ( boundary_flags[0] == 3 ) Set_Isolated_Boundary_GPU( 0, 0,  F.boundary_isolated_x0_d );
-  if ( boundary_flags[1] == 3 ) Set_Isolated_Boundary_GPU( 0, 1,  F.boundary_isolated_x1_d );
-  if ( boundary_flags[2] == 3 ) Set_Isolated_Boundary_GPU( 1, 0,  F.boundary_isolated_y0_d );
-  if ( boundary_flags[3] == 3 ) Set_Isolated_Boundary_GPU( 1, 1,  F.boundary_isolated_y1_d );
-  if ( boundary_flags[4] == 3 ) Set_Isolated_Boundary_GPU( 2, 0,  F.boundary_isolated_z0_d );
-  if ( boundary_flags[5] == 3 ) Set_Isolated_Boundary_GPU( 2, 1,  F.boundary_isolated_z1_d );
-
+void Potential_SOR_3D::Set_Isolated_Boundary_Conditions(int *boundary_flags, struct Parameters *P)
+{
+  if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3)
+    return;
+
+  chprintf(" Setting Isolated Boundaries \n");
+  if (boundary_flags[0] == 3) Set_Isolated_Boundary_GPU(0, 0, F.boundary_isolated_x0_d);
+  if (boundary_flags[1] == 3) Set_Isolated_Boundary_GPU(0, 1, F.boundary_isolated_x1_d);
+  if (boundary_flags[2] == 3) Set_Isolated_Boundary_GPU(1, 0, F.boundary_isolated_y0_d);
+  if (boundary_flags[3] == 3) Set_Isolated_Boundary_GPU(1, 1, F.boundary_isolated_y1_d);
+  if (boundary_flags[4] == 3) Set_Isolated_Boundary_GPU(2, 0, F.boundary_isolated_z0_d);
+  if (boundary_flags[5] == 3) Set_Isolated_Boundary_GPU(2, 1, F.boundary_isolated_z1_d);
 }
 
-
-
-
-void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic( int direction, int side ){
-
+void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic(int direction, int side)
+{
   Real *boundaries_buffer;
 
-  if( direction == 0 ){
-    if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_x0_d;
-    if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_x1_d;
+  if (direction == 0) {
+    if (side == 0) boundaries_buffer = F.boundaries_buffer_x0_d;
+    if (side == 1) boundaries_buffer = F.boundaries_buffer_x1_d;
   }
-  if( direction == 1 ){
-    if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_y0_d;
-    if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_y1_d;
+  if (direction == 1) {
+    if (side == 0) boundaries_buffer = F.boundaries_buffer_y0_d;
+    if (side == 1) boundaries_buffer = F.boundaries_buffer_y1_d;
   }
-  if( direction == 2 ){
-    if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_z0_d;
-    if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_z1_d;
+  if (direction == 2) {
+    if (side == 0) boundaries_buffer = F.boundaries_buffer_z0_d;
+    if (side == 1) boundaries_buffer = F.boundaries_buffer_z1_d;
   }
 
   int side_load, side_unload;
-  side_load = side;
-  side_unload = ( side_load + 1 ) % 2;
-
-  Load_Transfer_Buffer_GPU( direction, side_load, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, boundaries_buffer  );
-  Unload_Transfer_Buffer_GPU( direction, side_unload, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, boundaries_buffer  );
+  side_load   = side;
+  side_unload = (side_load + 1) % 2;
 
+  Load_Transfer_Buffer_GPU(direction, side_load, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                           boundaries_buffer);
+  Unload_Transfer_Buffer_GPU(direction, side_unload, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost,
+                             F.potential_d, boundaries_buffer);
 }
 
-
-void Potential_SOR_3D::FreeMemory_GPU( void ){
-
-  Free_Array_GPU_Real( F.input_d );
-  Free_Array_GPU_Real( F.density_d );
-  Free_Array_GPU_Real( F.potential_d );
-  Free_Array_GPU_Real( F.boundaries_buffer_x0_d );
-  Free_Array_GPU_Real( F.boundaries_buffer_x1_d );
-  Free_Array_GPU_Real( F.boundaries_buffer_y0_d );
-  Free_Array_GPU_Real( F.boundaries_buffer_y1_d );
-  Free_Array_GPU_Real( F.boundaries_buffer_z0_d );
-  Free_Array_GPU_Real( F.boundaries_buffer_z1_d );
+void Potential_SOR_3D::FreeMemory_GPU(void)
+{
+  Free_Array_GPU_Real(F.input_d);
+  Free_Array_GPU_Real(F.density_d);
+  Free_Array_GPU_Real(F.potential_d);
+  Free_Array_GPU_Real(F.boundaries_buffer_x0_d);
+  Free_Array_GPU_Real(F.boundaries_buffer_x1_d);
+  Free_Array_GPU_Real(F.boundaries_buffer_y0_d);
+  Free_Array_GPU_Real(F.boundaries_buffer_y1_d);
+  Free_Array_GPU_Real(F.boundaries_buffer_z0_d);
+  Free_Array_GPU_Real(F.boundaries_buffer_z1_d);
 
   #ifdef MPI_CHOLLA
-  Free_Array_GPU_Real( F.recv_boundaries_buffer_x0_d );
-  Free_Array_GPU_Real( F.recv_boundaries_buffer_x1_d );
-  Free_Array_GPU_Real( F.recv_boundaries_buffer_y0_d );
-  Free_Array_GPU_Real( F.recv_boundaries_buffer_y1_d );
-  Free_Array_GPU_Real( F.recv_boundaries_buffer_z0_d );
-  Free_Array_GPU_Real( F.recv_boundaries_buffer_z1_d );
+  Free_Array_GPU_Real(F.recv_boundaries_buffer_x0_d);
+  Free_Array_GPU_Real(F.recv_boundaries_buffer_x1_d);
+  Free_Array_GPU_Real(F.recv_boundaries_buffer_y0_d);
+  Free_Array_GPU_Real(F.recv_boundaries_buffer_y1_d);
+  Free_Array_GPU_Real(F.recv_boundaries_buffer_z0_d);
+  Free_Array_GPU_Real(F.recv_boundaries_buffer_z1_d);
   #endif
 
   #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  Free_Array_GPU_Real( F.boundary_isolated_x0_d );
-  Free_Array_GPU_Real( F.boundary_isolated_x1_d );
+  Free_Array_GPU_Real(F.boundary_isolated_x0_d);
+  Free_Array_GPU_Real(F.boundary_isolated_x1_d);
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_Y
-  Free_Array_GPU_Real( F.boundary_isolated_y0_d );
-  Free_Array_GPU_Real( F.boundary_isolated_y1_d );
+  Free_Array_GPU_Real(F.boundary_isolated_y0_d);
+  Free_Array_GPU_Real(F.boundary_isolated_y1_d);
   #endif
   #ifdef GRAV_ISOLATED_BOUNDARY_Z
-  Free_Array_GPU_Real( F.boundary_isolated_z0_d );
-  Free_Array_GPU_Real( F.boundary_isolated_z1_d );
+  Free_Array_GPU_Real(F.boundary_isolated_z0_d);
+  Free_Array_GPU_Real(F.boundary_isolated_z1_d);
   #endif
-
 }
 
-
-void Potential_SOR_3D::Reset( void ){
-  free( F.output_h );
+void Potential_SOR_3D::Reset(void)
+{
+  free(F.output_h);
   FreeMemory_GPU();
 }
 
+  #ifdef MPI_CHOLLA
 
-
-#ifdef MPI_CHOLLA
-
-int Grid3D::Load_Poisson_Boundary_To_Buffer( int direction, int side, Real *buffer_host  ){
-
+int Grid3D::Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer_host)
+{
   int size_buffer;
 
-  if ( direction == 0 ) size_buffer = Grav.Poisson_solver.size_buffer_x;
-  if ( direction == 1 ) size_buffer = Grav.Poisson_solver.size_buffer_y;
-  if ( direction == 2 ) size_buffer = Grav.Poisson_solver.size_buffer_z;
-
+  if (direction == 0) size_buffer = Grav.Poisson_solver.size_buffer_x;
+  if (direction == 1) size_buffer = Grav.Poisson_solver.size_buffer_y;
+  if (direction == 2) size_buffer = Grav.Poisson_solver.size_buffer_z;
 
-  //Load the transfer buffer in the GPU
-  if ( direction == 0 ){
-    if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x0();
-    if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x1();
+  // Load the transfer buffer in the GPU
+  if (direction == 0) {
+    if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x0();
+    if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x1();
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y0();
-    if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y1();
+  if (direction == 1) {
+    if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y0();
+    if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y1();
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z0();
-    if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z1();
+  if (direction == 2) {
+    if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z0();
+    if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z1();
   }
 
   // Copy the device_buffer to the host_buffer
   Real *buffer_dev;
-  if ( direction == 0 ){
-    if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x0_d;
-    if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x1_d;
+  if (direction == 0) {
+    if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x0_d;
+    if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x1_d;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y0_d;
-    if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y1_d;
+  if (direction == 1) {
+    if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y0_d;
+    if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y1_d;
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z0_d;
-    if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z1_d;
+  if (direction == 2) {
+    if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z0_d;
+    if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z1_d;
   }
 
-  Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host( size_buffer, buffer_host, buffer_dev );
-
+  Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host(size_buffer, buffer_host, buffer_dev);
 
   return size_buffer;
 }
 
-
-void Grid3D::Unload_Poisson_Boundary_From_Buffer( int direction, int side, Real *buffer_host  ){
-
+void Grid3D::Unload_Poisson_Boundary_From_Buffer(int direction, int side, Real *buffer_host)
+{
   int size_buffer;
 
-  if ( direction == 0 ) size_buffer = Grav.Poisson_solver.size_buffer_x;
-  if ( direction == 1 ) size_buffer = Grav.Poisson_solver.size_buffer_y;
-  if ( direction == 2 ) size_buffer = Grav.Poisson_solver.size_buffer_z;
-
+  if (direction == 0) size_buffer = Grav.Poisson_solver.size_buffer_x;
+  if (direction == 1) size_buffer = Grav.Poisson_solver.size_buffer_y;
+  if (direction == 2) size_buffer = Grav.Poisson_solver.size_buffer_z;
 
   // Copy the host_buffer to the device_buffer
   Real *buffer_dev;
-  if ( direction == 0 ){
-    if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d;
-    if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d;
+  if (direction == 0) {
+    if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d;
+    if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d;
-    if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d;
+  if (direction == 1) {
+    if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d;
+    if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d;
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d;
-    if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d;
+  if (direction == 2) {
+    if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d;
+    if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d;
   }
 
-  Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device( size_buffer, buffer_host, buffer_dev );
+  Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device(size_buffer, buffer_host, buffer_dev);
 
-  //Unload the transfer buffer in the GPU
-  if ( direction == 0 ){
-    if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x0();
-    if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x1();
+  // Unload the transfer buffer in the GPU
+  if (direction == 0) {
+    if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x0();
+    if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x1();
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y0();
-    if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y1();
+  if (direction == 1) {
+    if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y0();
+    if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y1();
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z0();
-    if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z1();
+  if (direction == 2) {
+    if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z0();
+    if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z1();
   }
-
 }
 
-
-
-void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x0(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Load_Transfer_Buffer_Half_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x0_d  );
-  #else
-  Load_Transfer_Buffer_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x0_d  );
-  #endif
+void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x0()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Load_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                F.boundaries_buffer_x0_d);
+    #else
+  Load_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                           F.boundaries_buffer_x0_d);
+    #endif
 }
 
-void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x1(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Load_Transfer_Buffer_Half_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x1_d  );
-  #else
-  Load_Transfer_Buffer_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x1_d  );
-  #endif
+void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x1()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Load_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                F.boundaries_buffer_x1_d);
+    #else
+  Load_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                           F.boundaries_buffer_x1_d);
+    #endif
 }
 
-void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y0(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Load_Transfer_Buffer_Half_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y0_d  );
-  #else
-  Load_Transfer_Buffer_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y0_d  );
-  #endif
+void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y0()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Load_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                F.boundaries_buffer_y0_d);
+    #else
+  Load_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                           F.boundaries_buffer_y0_d);
+    #endif
 }
 
-void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y1(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Load_Transfer_Buffer_Half_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y1_d  );
-  #else
-  Load_Transfer_Buffer_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y1_d  );
-  #endif
+void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y1()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Load_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                F.boundaries_buffer_y1_d);
+    #else
+  Load_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                           F.boundaries_buffer_y1_d);
+    #endif
 }
 
-void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z0(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Load_Transfer_Buffer_Half_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z0_d  );
-  #else
-  Load_Transfer_Buffer_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z0_d  );
-  #endif
+void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z0()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Load_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                F.boundaries_buffer_z0_d);
+    #else
+  Load_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                           F.boundaries_buffer_z0_d);
+    #endif
 }
 
-void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z1(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Load_Transfer_Buffer_Half_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z1_d  );
-  #else
-  Load_Transfer_Buffer_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z1_d  );
-  #endif
+void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z1()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Load_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                F.boundaries_buffer_z1_d);
+    #else
+  Load_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                           F.boundaries_buffer_z1_d);
+    #endif
 }
 
-
-void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Unload_Transfer_Buffer_Half_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d  );
-  #else
-  Unload_Transfer_Buffer_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d  );
-  #endif
+void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Unload_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                  F.recv_boundaries_buffer_x0_d);
+    #else
+  Unload_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                             F.recv_boundaries_buffer_x0_d);
+    #endif
 }
 
-void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Unload_Transfer_Buffer_Half_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d  );
-  #else
-  Unload_Transfer_Buffer_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d  );
-  #endif
+void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Unload_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                  F.recv_boundaries_buffer_x1_d);
+    #else
+  Unload_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                             F.recv_boundaries_buffer_x1_d);
+    #endif
 }
 
-void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Unload_Transfer_Buffer_Half_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d  );
-  #else
-  Unload_Transfer_Buffer_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d  );
-  #endif
+void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Unload_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                  F.recv_boundaries_buffer_y0_d);
+    #else
+  Unload_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                             F.recv_boundaries_buffer_y0_d);
+    #endif
 }
 
-void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Unload_Transfer_Buffer_Half_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d  );
-  #else
-  Unload_Transfer_Buffer_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d  );
-  #endif
+void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Unload_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                  F.recv_boundaries_buffer_y1_d);
+    #else
+  Unload_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                             F.recv_boundaries_buffer_y1_d);
+    #endif
 }
 
-void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Unload_Transfer_Buffer_Half_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d  );
-  #else
-  Unload_Transfer_Buffer_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d  );
-  #endif
+void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Unload_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                  F.recv_boundaries_buffer_z0_d);
+    #else
+  Unload_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                             F.recv_boundaries_buffer_z0_d);
+    #endif
 }
 
-void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z1(){
-  #ifdef HALF_SIZE_BOUNDARIES
-  Unload_Transfer_Buffer_Half_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d  );
-  #else
-  Unload_Transfer_Buffer_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d  );
-  #endif
+void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z1()
+{
+    #ifdef HALF_SIZE_BOUNDARIES
+  Unload_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                                  F.recv_boundaries_buffer_z1_d);
+    #else
+  Unload_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d,
+                             F.recv_boundaries_buffer_z1_d);
+    #endif
 }
 
-
-
-bool Potential_SOR_3D::Get_Global_Converged( bool converged_local ){
-
-  int in = (int) converged_local;
+bool Potential_SOR_3D::Get_Global_Converged(bool converged_local)
+{
+  int in = (int)converged_local;
   int out;
   bool y;
 
-  MPI_Allreduce( &in, &out, 1, MPI_INT, MPI_MIN, world);
-  y = (bool) out;
+  MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_MIN, world);
+  y = (bool)out;
   return y;
-
 }
 
-#endif
-
-
-
-
+  #endif
 
-#endif //GRAVITY
+#endif  // GRAVITY
diff --git a/src/gravity/potential_SOR_3D.h b/src/gravity/potential_SOR_3D.h
index d5064b35c..2b7c71abc 100644
--- a/src/gravity/potential_SOR_3D.h
+++ b/src/gravity/potential_SOR_3D.h
@@ -1,17 +1,18 @@
 #if defined(GRAVITY) && defined(SOR)
 
-#ifndef POTENTIAL_SOR_3D_H
-#define POTENTIAL_SOR_3D_H
+  #ifndef POTENTIAL_SOR_3D_H
+    #define POTENTIAL_SOR_3D_H
 
-#include "../global/global.h"
-#include <stdlib.h>
+    #include <stdlib.h>
+
+    #include "../global/global.h"
 
 // #define TIME_SOR
 // #define HALF_SIZE_BOUNDARIES
 
-class Potential_SOR_3D{
-  public:
-
+class Potential_SOR_3D
+{
+ public:
   Real Lbox_x;
   Real Lbox_y;
   Real Lbox_z;
@@ -38,7 +39,6 @@ class Potential_SOR_3D{
   grav_int_t n_cells_potential;
   grav_int_t n_cells_total;
 
-
   int n_ghost_transfer;
   int size_buffer_x;
   int size_buffer_y;
@@ -50,90 +50,97 @@ class Potential_SOR_3D{
 
   bool potential_initialized;
 
-  struct Fields
-  {
-
-  Real *output_h;
-
-  Real *input_d;
-  // Real *output_d;
-  Real *density_d;
-  Real *potential_d;
-
-  bool *converged_d;
-
-  bool *converged_h;
-
-  Real *boundaries_buffer_x0_d;
-  Real *boundaries_buffer_x1_d;
-  Real *boundaries_buffer_y0_d;
-  Real *boundaries_buffer_y1_d;
-  Real *boundaries_buffer_z0_d;
-  Real *boundaries_buffer_z1_d;
-
-
-  Real *boundary_isolated_x0_d;
-  Real *boundary_isolated_x1_d;
-  Real *boundary_isolated_y0_d;
-  Real *boundary_isolated_y1_d;
-  Real *boundary_isolated_z0_d;
-  Real *boundary_isolated_z1_d;
-
-  #ifdef MPI_CHOLLA
-  Real *recv_boundaries_buffer_x0_d;
-  Real *recv_boundaries_buffer_x1_d;
-  Real *recv_boundaries_buffer_y0_d;
-  Real *recv_boundaries_buffer_y1_d;
-  Real *recv_boundaries_buffer_z0_d;
-  Real *recv_boundaries_buffer_z1_d;
-  #endif
+  struct Fields {
+    Real *output_h;
+
+    Real *input_d;
+    // Real *output_d;
+    Real *density_d;
+    Real *potential_d;
+
+    bool *converged_d;
+
+    bool *converged_h;
+
+    Real *boundaries_buffer_x0_d;
+    Real *boundaries_buffer_x1_d;
+    Real *boundaries_buffer_y0_d;
+    Real *boundaries_buffer_y1_d;
+    Real *boundaries_buffer_z0_d;
+    Real *boundaries_buffer_z1_d;
+
+    Real *boundary_isolated_x0_d;
+    Real *boundary_isolated_x1_d;
+    Real *boundary_isolated_y0_d;
+    Real *boundary_isolated_y1_d;
+    Real *boundary_isolated_z0_d;
+    Real *boundary_isolated_z1_d;
+
+    #ifdef MPI_CHOLLA
+    Real *recv_boundaries_buffer_x0_d;
+    Real *recv_boundaries_buffer_x1_d;
+    Real *recv_boundaries_buffer_y0_d;
+    Real *recv_boundaries_buffer_y1_d;
+    Real *recv_boundaries_buffer_z0_d;
+    Real *recv_boundaries_buffer_z1_d;
+    #endif
 
   } F;
 
-  Potential_SOR_3D( void );
-
-  void Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx, Real dy, Real dz );
-
-  void AllocateMemory_CPU( void );
-  void AllocateMemory_GPU( void );
-  void FreeMemory_GPU( void );
-  void Reset( void );
-  void Copy_Input( int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, Real current_a );
-
-  void Copy_Output( Real *output_potential );
-  void Copy_Potential_From_Host( Real *output_potential );
-
-
-  void Set_Boundaries(  );
-  // Real Get_Potential( Real *input_density,  Real *output_potential, Real Grav_Constant, Real dens_avrg, Real current_a );
-  // void Copy_Potential_From_Host( Real *potential_host );
-
-  void Allocate_Array_GPU_Real( Real **array_dev, grav_int_t size );
-  void Allocate_Array_GPU_bool( bool **array_dev, grav_int_t size );
-  void Free_Array_GPU_Real( Real * array_dev );
-  void Free_Array_GPU_bool( bool * array_dev );
-
-
-
-  void Initialize_Potential( int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d );
-  void Copy_Input_And_Initialize( Real *input_density, const Real *input_potential, Real Grav_Constant, Real dens_avrg, Real current_a );
-
-  void Poisson_iteration( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d );
-  void Poisson_iteration_Patial_1( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d );
-  void Poisson_iteration_Patial_2( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d );
-  void Poisson_Partial_Iteration( int n_step, Real omega, Real epsilon );
-
-
-  void Load_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  );
-  void Load_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  );
+  Potential_SOR_3D(void);
+
+  void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real,
+                  int ny_real, int nz_real, Real dx, Real dy, Real dz);
+
+  void AllocateMemory_CPU(void);
+  void AllocateMemory_GPU(void);
+  void FreeMemory_GPU(void);
+  void Reset(void);
+  void Copy_Input(int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg,
+                  Real current_a);
+
+  void Copy_Output(Real *output_potential);
+  void Copy_Potential_From_Host(Real *output_potential);
+
+  void Set_Boundaries();
+  // Real Get_Potential( Real *input_density,  Real *output_potential, Real
+  // Grav_Constant, Real dens_avrg, Real current_a ); void
+  // Copy_Potential_From_Host( Real *potential_host );
+
+  void Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size);
+  void Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size);
+  void Free_Array_GPU_Real(Real *array_dev);
+  void Free_Array_GPU_bool(bool *array_dev);
+
+  void Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d);
+  void Copy_Input_And_Initialize(Real *input_density, const Real *input_potential, Real Grav_Constant, Real dens_avrg,
+                                 Real current_a);
+
+  void Poisson_iteration(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz,
+                         Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h,
+                         bool *converged_d);
+  void Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz,
+                                  Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h,
+                                  bool *converged_d);
+  void Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz,
+                                  Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h,
+                                  bool *converged_d);
+  void Poisson_Partial_Iteration(int n_step, Real omega, Real epsilon);
+
+  void Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer,
+                                int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d);
+  void Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer,
+                                     int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d);
   void Load_Transfer_Buffer_GPU_x0();
   void Load_Transfer_Buffer_GPU_x1();
   void Load_Transfer_Buffer_GPU_y0();
   void Load_Transfer_Buffer_GPU_y1();
   void Load_Transfer_Buffer_GPU_z0();
   void Load_Transfer_Buffer_GPU_z1();
-  void Unload_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  );
-  void Unload_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  );
+  void Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer,
+                                  int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d);
+  void Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer,
+                                       int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d);
   void Unload_Transfer_Buffer_GPU_x0();
   void Unload_Transfer_Buffer_GPU_x1();
   void Unload_Transfer_Buffer_GPU_y0();
@@ -141,27 +148,23 @@ class Potential_SOR_3D{
   void Unload_Transfer_Buffer_GPU_z0();
   void Unload_Transfer_Buffer_GPU_z1();
 
-  void Copy_Poisson_Boundary_Periodic( int direction, int side );
+  void Copy_Poisson_Boundary_Periodic(int direction, int side);
 
-  void Copy_Poisson_Boundary_Open( int direction, int side );
+  void Copy_Poisson_Boundary_Open(int direction, int side);
 
   // void Load_Transfer_Buffer_GPU_All();
   // void Unload_Transfer_Buffer_GPU_All();
 
-  void Copy_Transfer_Buffer_To_Host( int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d );
-  void Copy_Transfer_Buffer_To_Device( int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d );
-
-  void Set_Isolated_Boundary_Conditions( int *boundary_flags, struct parameters *P );
-  void Set_Isolated_Boundary_GPU( int direction, int side,   Real *boundary_d  );
+  void Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d);
+  void Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d);
 
+  void Set_Isolated_Boundary_Conditions(int *boundary_flags, struct Parameters *P);
+  void Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d);
 
-  #ifdef MPI_CHOLLA
-  bool Get_Global_Converged( bool converged_local );
-  #endif
+    #ifdef MPI_CHOLLA
+  bool Get_Global_Converged(bool converged_local);
+    #endif
 };
 
-
-
-
-#endif //POTENTIAL_SOR_H
-#endif //GRAVITY
+  #endif  // POTENTIAL_SOR_H
+#endif    // GRAVITY
diff --git a/src/gravity/potential_SOR_3D_gpu.cu b/src/gravity/potential_SOR_3D_gpu.cu
index 47d680077..d2066edb8 100644
--- a/src/gravity/potential_SOR_3D_gpu.cu
+++ b/src/gravity/potential_SOR_3D_gpu.cu
@@ -1,74 +1,72 @@
 #if defined(CUDA) && defined(GRAVITY) && defined(SOR)
 
-#include "../gravity/potential_SOR_3D.h"
-#include "../global/global_cuda.h"
-#include "../io/io.h"
+  #include "../global/global_cuda.h"
+  #include "../gravity/potential_SOR_3D.h"
+  #include "../io/io.h"
 
+  #define TPB_SOR 1024
 
-#define TPB_SOR 1024
-
-
-void Potential_SOR_3D::Allocate_Array_GPU_Real( Real **array_dev, grav_int_t size ){
-  cudaMalloc( (void**)array_dev, size*sizeof(Real));
-  CudaCheckError();
+void Potential_SOR_3D::Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size)
+{
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real)));
 }
 
-void Potential_SOR_3D::Allocate_Array_GPU_bool( bool **array_dev, grav_int_t size ){
-  cudaMalloc( (void**)array_dev, size*sizeof(bool));
-  CudaCheckError();
+void Potential_SOR_3D::Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size)
+{
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(bool)));
 }
 
-void Potential_SOR_3D::Free_Array_GPU_Real( Real *array_dev ){
-  cudaFree( array_dev );
-  CudaCheckError();
-}
-
-void Potential_SOR_3D::Free_Array_GPU_bool( bool *array_dev ){
-  cudaFree( array_dev );
-  CudaCheckError();
-}
+void Potential_SOR_3D::Free_Array_GPU_Real(Real *array_dev) { GPU_Error_Check(cudaFree(array_dev)); }
 
-__global__ void Copy_Input_Kernel( int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg, Real current_a ){
+void Potential_SOR_3D::Free_Array_GPU_bool(bool *array_dev) { GPU_Error_Check(cudaFree(array_dev)); }
 
+__global__ void Copy_Input_Kernel(int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg,
+                                  Real current_a)
+{
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  if ( tid >= n_cells ) return;
+  if (tid >= n_cells) return;
 
   #ifdef COSMOLOGY
-  density_d[tid] = 4 * M_PI * Grav_Constant * ( input_d[tid] - dens_avrg ) / current_a;
+  density_d[tid] = 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg) / current_a;
   #else
-  density_d[tid] = 4 * M_PI * Grav_Constant * ( input_d[tid] - dens_avrg );
+  density_d[tid] = 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg);
   #endif
   // if (tid == 0) printf("dens: %f\n", density_d[tid]);
 }
 
-
-void Potential_SOR_3D::Copy_Input( int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, Real current_a ){
-  cudaMemcpy( input_d, input_density_h, n_cells*sizeof(Real), cudaMemcpyHostToDevice );
+void Potential_SOR_3D::Copy_Input(int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg,
+                                  Real current_a)
+{
+  cudaMemcpy(input_d, input_density_h, n_cells * sizeof(Real), cudaMemcpyHostToDevice);
 
   // set values for GPU kernels
-  int ngrid =  (n_cells_local + TPB_SOR - 1) / TPB_SOR;
+  int ngrid = (n_cells_local + TPB_SOR - 1) / TPB_SOR;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_SOR, 1, 1);
 
-  // Copy_Input_Kernel<<<dim1dGrid,dim1dBlock>>>( n_cells_local, F.input_d, F.density_d,  Grav_Constant, dens_avrg, current_a  );
-  hipLaunchKernelGGL( Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_cells_local, F.input_d, F.density_d,  Grav_Constant, dens_avrg, current_a  );
-
+  // Copy_Input_Kernel<<<dim1dGrid,dim1dBlock>>>( n_cells_local, F.input_d,
+  // F.density_d,  Grav_Constant, dens_avrg, current_a  );
+  hipLaunchKernelGGL(Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_cells_local, F.input_d, F.density_d,
+                     Grav_Constant, dens_avrg, current_a);
 }
 
-void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer( Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size ){
- cudaMemcpy( isolated_boundary_d, isolated_boundary_h, boundary_size*sizeof(Real), cudaMemcpyHostToDevice );
+void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d,
+                                                  int boundary_size)
+{
+  cudaMemcpy(isolated_boundary_d, isolated_boundary_h, boundary_size * sizeof(Real), cudaMemcpyHostToDevice);
 }
 
-__global__ void Initialize_Potential_Kernel( Real init_val, Real *potential_d, Real *density_d, int nx, int ny, int nz, int n_ghost ){
-
+__global__ void Initialize_Potential_Kernel(Real init_val, Real *potential_d, Real *density_d, int nx, int ny, int nz,
+                                            int n_ghost)
+{
   int tid_x, tid_y, tid_z, tid_pot;
   tid_x = blockIdx.x * blockDim.x + threadIdx.x;
   tid_y = blockIdx.y * blockDim.y + threadIdx.y;
   tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
-  if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return;
+  if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return;
 
   // tid = tid_x + tid_y*nx + tid_z*nx*ny;
 
@@ -77,88 +75,89 @@ __global__ void Initialize_Potential_Kernel( Real init_val, Real *potential_d, R
   tid_z += n_ghost;
 
   int nx_pot, ny_pot;
-  nx_pot = nx + 2*n_ghost;
-  ny_pot = ny + 2*n_ghost;
+  nx_pot = nx + 2 * n_ghost;
+  ny_pot = ny + 2 * n_ghost;
 
-
-  tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot;
+  tid_pot              = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot;
   potential_d[tid_pot] = init_val;
 
-  //if ( potential_d[tid_pot] !=1 ) printf("Error phi value: %f\n", potential_d[tid_pot] );
-
+  // if ( potential_d[tid_pot] !=1 ) printf("Error phi value: %f\n",
+  // potential_d[tid_pot] );
 
   // Real dens = density_d[tid];
   // potential_d[tid_pot] = -dens;
-
 }
 
-
-
-void Potential_SOR_3D::Initialize_Potential( int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d ){
+void Potential_SOR_3D::Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, Real *potential_d,
+                                            Real *density_d)
+{
   // set values for GPU kernels
-  int tpb_x = 16;
-  int tpb_y = 8;
-  int tpb_z = 8;
-  int ngrid_x =  (nx_local + tpb_x - 1) / tpb_x;
-  int ngrid_y =  (ny_local + tpb_y - 1) / tpb_y;
-  int ngrid_z =  (nz_local + tpb_z - 1) / tpb_z;
+  int tpb_x   = 16;
+  int tpb_y   = 8;
+  int tpb_z   = 8;
+  int ngrid_x = (nx_local + tpb_x - 1) / tpb_x;
+  int ngrid_y = (ny_local + tpb_y - 1) / tpb_y;
+  int ngrid_z = (nz_local + tpb_z - 1) / tpb_z;
   // number of blocks per 1D grid
   dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z);
   //  number of threads per 1D block
   dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
-  // Initialize_Potential_Kernel<<<dim3dGrid,dim3dBlock>>>( 1, potential_d, density_d, nx, ny, nz, n_ghost_potential );
-  hipLaunchKernelGGL( Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, 1, potential_d, density_d, nx, ny, nz, n_ghost_potential );
-
+  // Initialize_Potential_Kernel<<<dim3dGrid,dim3dBlock>>>( 1, potential_d,
+  // density_d, nx, ny, nz, n_ghost_potential );
+  hipLaunchKernelGGL(Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, 1, potential_d, density_d, nx, ny, nz,
+                     n_ghost_potential);
 }
 
-
-__global__ void Iteration_Step_SOR( int n_cells, Real *density_d, Real *potential_d, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real omega, int parity, Real epsilon,  bool *converged_d ){
-
+__global__ void Iteration_Step_SOR(int n_cells, Real *density_d, Real *potential_d, int nx, int ny, int nz, int n_ghost,
+                                   Real dx, Real dy, Real dz, Real omega, int parity, Real epsilon, bool *converged_d)
+{
   int tid_x, tid_y, tid_z, tid, tid_pot;
-  tid_x = 2*( blockIdx.x * blockDim.x + threadIdx.x );
+  tid_x = 2 * (blockIdx.x * blockDim.x + threadIdx.x);
   tid_y = blockIdx.y * blockDim.y + threadIdx.y;
   tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
   // Make a checkboard 3D grid
-  if ( tid_y%2 == 0 ){
-    if ( tid_z%2 == parity ) tid_x +=1;
-  }
-  else if ( (tid_z+1)%2 == parity ) tid_x +=1;
+  if (tid_y % 2 == 0) {
+    if (tid_z % 2 == parity) tid_x += 1;
+  } else if ((tid_z + 1) % 2 == parity)
+    tid_x += 1;
 
-  if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return;
+  if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return;
 
   int nx_pot, ny_pot;
-  nx_pot = nx + 2*n_ghost;
-  ny_pot = ny + 2*n_ghost;
+  nx_pot = nx + 2 * n_ghost;
+  ny_pot = ny + 2 * n_ghost;
   // nz_pot = nz + 2*n_ghost;
 
-  tid = tid_x + tid_y*nx + tid_z*nx*ny;
+  tid = tid_x + tid_y * nx + tid_z * nx * ny;
 
   tid_x += n_ghost;
   tid_y += n_ghost;
   tid_z += n_ghost;
-  tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot;
+  tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot;
 
   // //Set neighbors ids
   int indx_l, indx_r, indx_d, indx_u, indx_b, indx_t;
 
-  indx_l = tid_x-1;  //Left
-  indx_r = tid_x+1;  //Right
-  indx_d = tid_y-1;  //Down
-  indx_u = tid_y+1;  //Up
-  indx_b = tid_z-1;  //Bottom
-  indx_t = tid_z+1;  //Top
+  indx_l = tid_x - 1;  // Left
+  indx_r = tid_x + 1;  // Right
+  indx_d = tid_y - 1;  // Down
+  indx_u = tid_y + 1;  // Up
+  indx_b = tid_z - 1;  // Bottom
+  indx_t = tid_z + 1;  // Top
 
-  //Boundary Conditions are loaded to the potential array, the natural indices work!
+  // Boundary Conditions are loaded to the potential array, the natural indices
+  // work!
 
   // //Periodic Boundary conditions
   // indx_l = tid_x == n_ghost          ?    nx_pot-n_ghost-1 : tid_x-1;  //Left
-  // indx_r = tid_x == nx_pot-n_ghost-1 ?             n_ghost : tid_x+1;  //Right
+  // indx_r = tid_x == nx_pot-n_ghost-1 ?             n_ghost : tid_x+1; //Right
   // indx_d = tid_y == n_ghost          ?    ny_pot-n_ghost-1 : tid_y-1;  //Down
   // indx_u = tid_y == ny_pot-n_ghost-1 ?             n_ghost : tid_y+1;  //Up
-  // indx_b = tid_z == n_ghost          ?    nz_pot-n_ghost-1 : tid_z-1;  //Bottom
-  // indx_t = tid_z == nz_pot-n_ghost-1 ?             n_ghost : tid_z+1;  //Top
+  // indx_b = tid_z == n_ghost          ?    nz_pot-n_ghost-1 : tid_z-1;
+  // //Bottom indx_t = tid_z == nz_pot-n_ghost-1 ?             n_ghost :
+  // tid_z+1;  //Top
   //
   // //Zero Gradient Boundary conditions
   // indx_l = tid_x == n_ghost          ?    tid_x+1 : tid_x-1;  //Left
@@ -168,163 +167,174 @@ __global__ void Iteration_Step_SOR( int n_cells, Real *density_d, Real *potentia
   // indx_b = tid_z == n_ghost          ?    tid_z+1 : tid_z-1;  //Bottom
   // indx_t = tid_z == nz_pot-n_ghost-1 ?    tid_z-1 : tid_z+1;  //Top
 
-
-
   Real rho, phi_c, phi_l, phi_r, phi_d, phi_u, phi_b, phi_t, phi_new;
-  rho = density_d[tid];
+  rho   = density_d[tid];
   phi_c = potential_d[tid_pot];
-  phi_l = potential_d[ indx_l + tid_y*nx_pot + tid_z*nx_pot*ny_pot ];
-  phi_r = potential_d[ indx_r + tid_y*nx_pot + tid_z*nx_pot*ny_pot ];
-  phi_d = potential_d[ tid_x + indx_d*nx_pot + tid_z*nx_pot*ny_pot ];
-  phi_u = potential_d[ tid_x + indx_u*nx_pot + tid_z*nx_pot*ny_pot ];
-  phi_b = potential_d[ tid_x + tid_y*nx_pot + indx_b*nx_pot*ny_pot ];
-  phi_t = potential_d[ tid_x + tid_y*nx_pot + indx_t*nx_pot*ny_pot ];
-
-  phi_new = (1-omega)*phi_c + omega/6*( phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx*dx*rho );
+  phi_l = potential_d[indx_l + tid_y * nx_pot + tid_z * nx_pot * ny_pot];
+  phi_r = potential_d[indx_r + tid_y * nx_pot + tid_z * nx_pot * ny_pot];
+  phi_d = potential_d[tid_x + indx_d * nx_pot + tid_z * nx_pot * ny_pot];
+  phi_u = potential_d[tid_x + indx_u * nx_pot + tid_z * nx_pot * ny_pot];
+  phi_b = potential_d[tid_x + tid_y * nx_pot + indx_b * nx_pot * ny_pot];
+  phi_t = potential_d[tid_x + tid_y * nx_pot + indx_t * nx_pot * ny_pot];
+
+  phi_new = (1 - omega) * phi_c + omega / 6 * (phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx * dx * rho);
   potential_d[tid_pot] = phi_new;
   // potential_d[tid_pot] = parity + 1;
 
-  //Check the residual for the convergence criteria
-  if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) converged_d[0] = 0;
-  // if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) printf("%f\n", fabs( ( phi_new - phi_c ) / phi_c)  );
-  // if ( ( fabs( ( phi_new - phi_c ) ) > epsilon ) ) converged_d[0] = 0;
-
-
-
-
+  // Check the residual for the convergence criteria
+  if ((fabs((phi_new - phi_c) / phi_c) > epsilon)) converged_d[0] = 0;
+  // if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) printf("%f\n",
+  // fabs( ( phi_new - phi_c ) / phi_c)  ); if ( ( fabs( ( phi_new - phi_c ) ) >
+  // epsilon ) ) converged_d[0] = 0;
 }
 
-void Potential_SOR_3D::Poisson_iteration( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){
-
+void Potential_SOR_3D::Poisson_iteration(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy,
+                                         Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d,
+                                         bool *converged_h, bool *converged_d)
+{
   // set values for GPU kernels
-  int tpb_x = 16;
-  int tpb_y = 8;
-  int tpb_z = 8;
-  int ngrid_x =  (nx_local + tpb_x - 1) / tpb_x;
-  int ngrid_y =  (ny_local + tpb_y - 1) / tpb_y;
-  int ngrid_z =  (nz_local + tpb_z - 1) / tpb_z;
-  int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x;
+  int tpb_x        = 16;
+  int tpb_y        = 8;
+  int tpb_z        = 8;
+  int ngrid_x      = (nx_local + tpb_x - 1) / tpb_x;
+  int ngrid_y      = (ny_local + tpb_y - 1) / tpb_y;
+  int ngrid_z      = (nz_local + tpb_z - 1) / tpb_z;
+  int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x;
   // number of blocks per 1D grid
   dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z);
   dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z);
   //  number of threads per 1D block
   dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
-  cudaMemset( converged_d, 1, sizeof(bool) );
-
-  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d );
-  hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d );
+  cudaMemset(converged_d, 1, sizeof(bool));
 
-  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d );
-  hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d );
+  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d,
+  // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon,
+  // converged_d );
+  hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz,
+                     n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d);
 
-  cudaMemcpy( converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost );
+  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d,
+  // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon,
+  // converged_d );
+  hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz,
+                     n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d);
 
+  cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost);
 }
 
-
-void Potential_SOR_3D::Poisson_iteration_Patial_1( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){
-
+void Potential_SOR_3D::Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx,
+                                                  Real dy, Real dz, Real omega, Real epsilon, Real *density_d,
+                                                  Real *potential_d, bool *converged_h, bool *converged_d)
+{
   // set values for GPU kernels
-  int tpb_x = 16;
-  int tpb_y = 8;
-  int tpb_z = 8;
-  int ngrid_x =  (nx_local + tpb_x - 1) / tpb_x;
-  int ngrid_y =  (ny_local + tpb_y - 1) / tpb_y;
-  int ngrid_z =  (nz_local + tpb_z - 1) / tpb_z;
-  int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x;
+  int tpb_x        = 16;
+  int tpb_y        = 8;
+  int tpb_z        = 8;
+  int ngrid_x      = (nx_local + tpb_x - 1) / tpb_x;
+  int ngrid_y      = (ny_local + tpb_y - 1) / tpb_y;
+  int ngrid_z      = (nz_local + tpb_z - 1) / tpb_z;
+  int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x;
   // number of blocks per 1D grid
   dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z);
   dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z);
   //  number of threads per 1D block
   dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
-  cudaMemset( converged_d, 1, sizeof(bool) );
-
-  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d );
-  hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d );
+  cudaMemset(converged_d, 1, sizeof(bool));
 
+  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d,
+  // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon,
+  // converged_d );
+  hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz,
+                     n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d);
 }
 
-
-void Potential_SOR_3D::Poisson_iteration_Patial_2( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){
-
+void Potential_SOR_3D::Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx,
+                                                  Real dy, Real dz, Real omega, Real epsilon, Real *density_d,
+                                                  Real *potential_d, bool *converged_h, bool *converged_d)
+{
   // set values for GPU kernels
-  int tpb_x = 16;
-  int tpb_y = 8;
-  int tpb_z = 8;
-  int ngrid_x =  (nx_local + tpb_x - 1) / tpb_x;
-  int ngrid_y =  (ny_local + tpb_y - 1) / tpb_y;
-  int ngrid_z =  (nz_local + tpb_z - 1) / tpb_z;
-  int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x;
+  int tpb_x        = 16;
+  int tpb_y        = 8;
+  int tpb_z        = 8;
+  int ngrid_x      = (nx_local + tpb_x - 1) / tpb_x;
+  int ngrid_y      = (ny_local + tpb_y - 1) / tpb_y;
+  int ngrid_z      = (nz_local + tpb_z - 1) / tpb_z;
+  int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x;
   // number of blocks per 1D grid
   dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z);
   dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z);
   //  number of threads per 1D block
   dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
-  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d );
-  hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d );
-
-  cudaMemcpy( converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost );
+  // Iteration_Step_SOR<<<dim3dGrid_half,dim3dBlock>>>( n_cells, density_d,
+  // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon,
+  // converged_d );
+  hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz,
+                     n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d);
 
+  cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost);
 }
 
-
-__global__ void Set_Isolated_Boundary_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int n_ghost, int nx_pot, int ny_pot, int nz_pot,  Real *potential_d, Real *boundary_d   ){
-
+__global__ void Set_Isolated_Boundary_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j,
+                                                 int n_ghost, int nx_pot, int ny_pot, int nz_pot, Real *potential_d,
+                                                 Real *boundary_d)
+{
   // get a global thread ID
   int nx_local, ny_local, nz_local;
-  nx_local = nx_pot - 2*n_ghost;
-  ny_local = ny_pot - 2*n_ghost;
-  nz_local = nz_pot - 2*n_ghost;
+  nx_local = nx_pot - 2 * n_ghost;
+  ny_local = ny_pot - 2 * n_ghost;
+  nz_local = nz_pot - 2 * n_ghost;
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return;
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = (tid_k)                  + (tid_i+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = (tid_k+nx_local+n_ghost) + (tid_i+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot;
+  if (direction == 0) {
+    if (side == 0) tid_pot = (tid_k) + (tid_i + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot;
+    if (side == 1)
+      tid_pot = (tid_k + nx_local + n_ghost) + (tid_i + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_k)*nx_pot                  + (tid_j+n_ghost)*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_k+ny_local+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot;
+  if (direction == 1) {
+    if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_k)*nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot;
+    if (side == 1)
+      tid_pot = (tid_i + n_ghost) + (tid_k + ny_local + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot;
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx_pot + (tid_k)*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx_pot + (tid_k+nz_local+n_ghost)*nx_pot*ny_pot;
+  if (direction == 2) {
+    if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + (tid_k)*nx_pot * ny_pot;
+    if (side == 1)
+      tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + (tid_k + nz_local + n_ghost) * nx_pot * ny_pot;
   }
 
   potential_d[tid_pot] = boundary_d[tid_buffer];
-
 }
 
-void Potential_SOR_3D::Set_Isolated_Boundary_GPU( int direction, int side,   Real *boundary_d  ){
-
+void Potential_SOR_3D::Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d)
+{
   // #ifdef MPI_CHOLLA
-  // printf("Pid: %d Setting Isolated Boundary: %d %d \n",procID, direction, side );
-  // #endif
+  // printf("Pid: %d Setting Isolated Boundary: %d %d \n",procID, direction,
+  // side ); #endif
   //
   int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid;
-  nx_pot = nx_local + 2*n_ghost;
-  ny_pot = ny_local + 2*n_ghost;
-  nz_pot = nz_local + 2*n_ghost;
+  nx_pot = nx_local + 2 * n_ghost;
+  ny_pot = ny_local + 2 * n_ghost;
+  nz_pot = nz_local + 2 * n_ghost;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_local;
     n_j = nz_local;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_local;
     n_j = nz_local;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_local;
     n_j = ny_local;
   }
@@ -332,74 +342,76 @@ void Potential_SOR_3D::Set_Isolated_Boundary_GPU( int direction, int side,   Rea
   size_buffer = n_ghost * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_SOR + 1;
+  ngrid = (size_buffer - 1) / TPB_SOR + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_SOR, 1, 1);
 
-  // Set_Isolated_Boundary_GPU_kernel<<<dim1dGrid,dim1dBlock>>>( direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot,  F.potential_d, boundary_d  );
-  hipLaunchKernelGGL( Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock,  0, 0, direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot,  F.potential_d, boundary_d  );
-
+  // Set_Isolated_Boundary_GPU_kernel<<<dim1dGrid,dim1dBlock>>>( direction,
+  // side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot,
+  // F.potential_d, boundary_d  );
+  hipLaunchKernelGGL(Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i,
+                     n_j, n_ghost, nx_pot, ny_pot, nz_pot, F.potential_d, boundary_d);
 }
 
-
-
-void Potential_SOR_3D::Copy_Output( Real *output_potential ){
-  cudaMemcpy( output_potential, F.potential_d, n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost );
+void Potential_SOR_3D::Copy_Output(Real *output_potential)
+{
+  cudaMemcpy(output_potential, F.potential_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost);
 }
 
-void Potential_SOR_3D::Copy_Potential_From_Host( Real *output_potential ){
-  cudaMemcpy(  F.potential_d, output_potential, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice );
+void Potential_SOR_3D::Copy_Potential_From_Host(Real *output_potential)
+{
+  cudaMemcpy(F.potential_d, output_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice);
 }
 
-
-
-__global__ void Load_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d   ){
-
+__global__ void Load_Transfer_Buffer_GPU_kernel_SOR(int direction, int side, int size_buffer, int n_i, int n_j, int nx,
+                                                    int ny, int nz, int n_ghost_transfer, int n_ghost_potential,
+                                                    Real *potential_d, Real *transfer_buffer_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return;
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k  )                        + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = ( nx - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny;
+  if (direction == 0) {
+    if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    if (side == 1) tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k  )*nx                         + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential - n_ghost_transfer + tid_k  )*nx + (tid_j)*nx*ny;
+  if (direction == 1) {
+    if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny;
+    if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny;
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential + tid_k  )*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential - n_ghost_transfer + tid_k  )*nx*ny;
+  if (direction == 2) {
+    if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny;
+    if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny;
   }
   transfer_buffer_d[tid_buffer] = potential_d[tid_pot];
-
 }
 
-__global__ void Load_Transfer_Buffer_GPU_Half_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d, int parity   ){
-
+__global__ void Load_Transfer_Buffer_GPU_Half_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx,
+                                                     int ny, int nz, int n_ghost_transfer, int n_ghost_potential,
+                                                     Real *potential_d, Real *transfer_buffer_d, int parity)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
   int nx_pot, ny_pot, nz_pot;
-  nx_pot = nx + 2*n_ghost_potential;
-  ny_pot = ny + 2*n_ghost_potential;
-  nz_pot = nz + 2*n_ghost_potential;
-
+  nx_pot = nx + 2 * n_ghost_potential;
+  ny_pot = ny + 2 * n_ghost_potential;
+  nz_pot = nz + 2 * n_ghost_potential;
 
   // // Make a checkboard 3D grid
   // tid_i = 2 * tid_i;
@@ -408,78 +420,80 @@ __global__ void Load_Transfer_Buffer_GPU_Half_kernel( int direction, int side, i
   // }
   // else if ( (tid_k+1)%2 == parity ) tid_i +=1;
 
-
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return;
 
   tid_i += n_ghost_potential;
   tid_j += n_ghost_potential;
 
-
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k  )                            + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = ( nx_pot - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot;
+  if (direction == 0) {
+    if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot;
+    if (side == 1)
+      tid_pot = (nx_pot - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k  )*nx_pot                             + (tid_j)*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = (tid_i) + ( ny_pot - n_ghost_potential - n_ghost_transfer + tid_k  )*nx_pot + (tid_j)*nx_pot*ny_pot;
+  if (direction == 1) {
+    if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot;
+    if (side == 1)
+      tid_pot = (tid_i) + (ny_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot;
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( n_ghost_potential + tid_k  )*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( nz_pot - n_ghost_potential - n_ghost_transfer + tid_k  )*nx_pot*ny_pot;
+  if (direction == 2) {
+    if (side == 0) tid_pot = (tid_i) + (tid_j)*nx_pot + (n_ghost_potential + tid_k) * nx_pot * ny_pot;
+    if (side == 1)
+      tid_pot = (tid_i) + (tid_j)*nx_pot + (nz_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot;
   }
 
-  // printf( "Loading Buffer Half:   val= %d    pot= %f \n", parity+1, potential_d[tid_pot]  );
+  // printf( "Loading Buffer Half:   val= %d    pot= %f \n", parity+1,
+  // potential_d[tid_pot]  );
   transfer_buffer_d[tid_buffer] = potential_d[tid_pot];
-
 }
 
-
-
-__global__ void Unload_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d   ){
-
+__global__ void Unload_Transfer_Buffer_GPU_kernel_SOR(int direction, int side, int size_buffer, int n_i, int n_j,
+                                                      int nx, int ny, int nz, int n_ghost_transfer,
+                                                      int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return;
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k  ) + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = ( nx - n_ghost_potential + tid_k )                + (tid_i)*nx + (tid_j)*nx*ny;
+  if (direction == 0) {
+    if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    if (side == 1) tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k  )*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential + tid_k  )*nx               + (tid_j)*nx*ny;
+  if (direction == 1) {
+    if (side == 0) tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny;
+    if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny;
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential - n_ghost_transfer + tid_k  )*nx*ny;
-    if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential + tid_k  )*nx*ny;
+  if (direction == 2) {
+    if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny;
+    if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny;
   }
   potential_d[tid_pot] = transfer_buffer_d[tid_buffer];
-
 }
 
-
-__global__ void Unload_Transfer_Buffer_GPU_Half_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d, int parity   ){
-
+__global__ void Unload_Transfer_Buffer_GPU_Half_kernel(int direction, int side, int size_buffer, int n_i, int n_j,
+                                                       int nx, int ny, int nz, int n_ghost_transfer,
+                                                       int n_ghost_potential, Real *potential_d,
+                                                       Real *transfer_buffer_d, int parity)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
   int nx_pot, ny_pot, nz_pot;
-  nx_pot = nx + 2*n_ghost_potential;
-  ny_pot = ny + 2*n_ghost_potential;
-  nz_pot = nz + 2*n_ghost_potential;
+  nx_pot = nx + 2 * n_ghost_potential;
+  ny_pot = ny + 2 * n_ghost_potential;
+  nz_pot = nz + 2 * n_ghost_potential;
 
   // // Make a checkboard 3D grid
   // tid_i = 2 * tid_i;
@@ -488,47 +502,45 @@ __global__ void Unload_Transfer_Buffer_GPU_Half_kernel( int direction, int side,
   // }
   // else if ( (tid_k+1)%2 == parity ) tid_i +=1;
 
-
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return;
 
   tid_i += n_ghost_potential;
   tid_j += n_ghost_potential;
 
-
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k  )     + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = ( nx_pot - n_ghost_potential + tid_k )                + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot;
+  if (direction == 0) {
+    if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot;
+    if (side == 1) tid_pot = (nx_pot - n_ghost_potential + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot;
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k  )*nx_pot     + (tid_j)*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = (tid_i) + ( ny_pot - n_ghost_potential + tid_k  )*nx_pot               + (tid_j)*nx_pot*ny_pot;
+  if (direction == 1) {
+    if (side == 0)
+      tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot;
+    if (side == 1) tid_pot = (tid_i) + (ny_pot - n_ghost_potential + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot;
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( n_ghost_potential - n_ghost_transfer + tid_k  )*nx_pot*ny_pot;
-    if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( nz_pot - n_ghost_potential + tid_k  )*nx_pot*ny_pot;
+  if (direction == 2) {
+    if (side == 0)
+      tid_pot = (tid_i) + (tid_j)*nx_pot + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot;
+    if (side == 1) tid_pot = (tid_i) + (tid_j)*nx_pot + (nz_pot - n_ghost_potential + tid_k) * nx_pot * ny_pot;
   }
   potential_d[tid_pot] = transfer_buffer_d[tid_buffer];
-
 }
 
-
-
-void Potential_SOR_3D::Load_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  ){
-
+void Potential_SOR_3D::Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer,
+                                                int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d)
+{
   int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid;
-  nx_pot = nx + 2*n_ghost_potential;
-  ny_pot = ny + 2*n_ghost_potential;
-  nz_pot = nz + 2*n_ghost_potential;
+  nx_pot = nx + 2 * n_ghost_potential;
+  ny_pot = ny + 2 * n_ghost_potential;
+  nz_pot = nz + 2 * n_ghost_potential;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_pot;
     n_j = nz_pot;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_pot;
     n_j = nz_pot;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_pot;
     n_j = ny_pot;
   }
@@ -536,33 +548,36 @@ void Potential_SOR_3D::Load_Transfer_Buffer_GPU( int direction, int side, int nx
   size_buffer = n_ghost_transfer * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_SOR + 1;
+  ngrid = (size_buffer - 1) / TPB_SOR + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_SOR, 1, 1);
 
-
-  // Load_Transfer_Buffer_GPU_kernel<<<dim1dGrid,dim1dBlock>>>( direction, side, size_buffer, n_i, n_j,  nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d  );
-  hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j,  nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d  );
-
+  // Load_Transfer_Buffer_GPU_kernel<<<dim1dGrid,dim1dBlock>>>( direction, side,
+  // size_buffer, n_i, n_j,  nx_pot, ny_pot, nz_pot, n_ghost_transfer,
+  // n_ghost_potential, potential_d, transfer_buffer_d  );
+  hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer,
+                     n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d,
+                     transfer_buffer_d);
 }
 
-
-void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  ){
-
+void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz,
+                                                     int n_ghost_transfer, int n_ghost_potential, Real *potential_d,
+                                                     Real *transfer_buffer_d)
+{
   int size_buffer, n_i, n_j, ngrid;
-  nz_pot = nz + 2*n_ghost_potential;
+  nz_pot = nz + 2 * n_ghost_potential;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny;
     n_j = nz;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx;
     n_j = nz;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx;
     n_j = ny;
   }
@@ -571,34 +586,37 @@ void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( int direction, int side, i
   size_buffer = n_ghost_transfer * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_SOR + 1;
+  ngrid = (size_buffer - 1) / TPB_SOR + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_SOR, 1, 1);
 
-
-  // Load_Transfer_Buffer_GPU_Half_kernel<<<dim1dGrid,dim1dBlock>>>( direction, side, size_buffer, n_i, n_j,  nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity  );
-  hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0,  direction, side, size_buffer, n_i, n_j,  nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity  );
-
+  // Load_Transfer_Buffer_GPU_Half_kernel<<<dim1dGrid,dim1dBlock>>>( direction,
+  // side, size_buffer, n_i, n_j,  nx, ny, nz, n_ghost_transfer,
+  // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity  );
+  hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer,
+                     n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d,
+                     iteration_parity);
 }
 
-void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  ){
-
+void Potential_SOR_3D::Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer,
+                                                  int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d)
+{
   int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid;
-  nx_pot = nx + 2*n_ghost_potential;
-  ny_pot = ny + 2*n_ghost_potential;
-  nz_pot = nz + 2*n_ghost_potential;
+  nx_pot = nx + 2 * n_ghost_potential;
+  ny_pot = ny + 2 * n_ghost_potential;
+  nz_pot = nz + 2 * n_ghost_potential;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_pot;
     n_j = nz_pot;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_pot;
     n_j = nz_pot;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_pot;
     n_j = ny_pot;
   }
@@ -606,32 +624,35 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( int direction, int side, int
   size_buffer = n_ghost_transfer * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_SOR + 1;
+  ngrid = (size_buffer - 1) / TPB_SOR + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_SOR, 1, 1);
 
-
-  // Unload_Transfer_Buffer_GPU_kernel<<<dim1dGrid,dim1dBlock>>>( direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d  );
-  hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel,dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d  );
-
+  // Unload_Transfer_Buffer_GPU_kernel<<<dim1dGrid,dim1dBlock>>>( direction,
+  // side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer,
+  // n_ghost_potential, potential_d, transfer_buffer_d  );
+  hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer,
+                     n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d,
+                     transfer_buffer_d);
 }
 
-
-void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d  ){
-
+void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz,
+                                                       int n_ghost_transfer, int n_ghost_potential, Real *potential_d,
+                                                       Real *transfer_buffer_d)
+{
   int size_buffer, n_i, n_j, ngrid;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny;
     n_j = nz;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx;
     n_j = nz;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx;
     n_j = ny;
   }
@@ -640,31 +661,27 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( int direction, int side,
   size_buffer = n_ghost_transfer * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_SOR + 1;
+  ngrid = (size_buffer - 1) / TPB_SOR + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_SOR, 1, 1);
 
-
-  // Unload_Transfer_Buffer_GPU_Half_kernel<<<dim1dGrid,dim1dBlock>>>( direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity  );
-  hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity);
-
+  // Unload_Transfer_Buffer_GPU_Half_kernel<<<dim1dGrid,dim1dBlock>>>(
+  // direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer,
+  // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity  );
+  hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer,
+                     n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d,
+                     iteration_parity);
 }
-void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host( int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d ){
-  CudaSafeCall( cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer*sizeof(Real), cudaMemcpyDeviceToHost ) );
+void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d)
+{
+  GPU_Error_Check(cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer * sizeof(Real), cudaMemcpyDeviceToHost));
 }
 
-
-void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device( int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d ){
-  CudaSafeCall( cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer*sizeof(Real), cudaMemcpyHostToDevice ) );
+void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d)
+{
+  GPU_Error_Check(cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice));
 }
 
-
-#endif //GRAVITY
-
-
-
-
-
-
+#endif  // GRAVITY
diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu
index 6c9ec503c..c3a66ae9e 100644
--- a/src/gravity/potential_paris_3D.cu
+++ b/src/gravity/potential_paris_3D.cu
@@ -1,79 +1,86 @@
 #if defined(GRAVITY) && defined(PARIS)
 
-#include "../gravity/potential_paris_3D.h"
-#include "../utils/gpu.hpp"
-#include "../io/io.h"
-#include <cassert>
-#include <cfloat>
-#include <climits>
-
-static void __attribute__((unused)) printDiff(const Real *p, const Real *q, const int ng, const int nx, const int ny, const int nz, const bool plot = false)
+  #include <cassert>
+  #include <cfloat>
+  #include <climits>
+
+  #include "../gravity/potential_paris_3D.h"
+  #include "../io/io.h"
+  #include "../utils/gpu.hpp"
+
+static void __attribute__((unused)) Print_Diff(const Real *p, const Real *q, const int ng, const int nx, const int ny,
+                                               const int nz, const bool plot = false)
 {
   Real dMax = 0, dSum = 0, dSum2 = 0;
   Real qMax = 0, qSum = 0, qSum2 = 0;
-#pragma omp parallel for reduction(max:dMax,qMax) reduction(+:dSum,dSum2,qSum,qSum2)
+  #pragma omp parallel for reduction(max : dMax, qMax) reduction(+ : dSum, dSum2, qSum, qSum2)
   for (int k = 0; k < nz; k++) {
     for (int j = 0; j < ny; j++) {
       for (int i = 0; i < nx; i++) {
-        const int ijk = i+ng+(nx+ng+ng)*(j+ng+(ny+ng+ng)*(k+ng));
+        const int ijk   = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng));
         const Real qAbs = fabs(q[ijk]);
-        qMax = std::max(qMax,qAbs);
+        qMax            = std::max(qMax, qAbs);
         qSum += qAbs;
-        qSum2 += qAbs*qAbs;
-        const Real d = fabs(q[ijk]-p[ijk]);
-        dMax = std::max(dMax,d);
+        qSum2 += qAbs * qAbs;
+        const Real d = fabs(q[ijk] - p[ijk]);
+        dMax         = std::max(dMax, d);
         dSum += d;
-        dSum2 += d*d;
+        dSum2 += d * d;
       }
     }
   }
-  Real maxs[2] = {qMax,dMax};
-  Real sums[4] = {qSum,qSum2,dSum,dSum2};
-  MPI_Allreduce(MPI_IN_PLACE,&maxs,2,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
-  MPI_Allreduce(MPI_IN_PLACE,&sums,4,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
-  chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n",sums[2]/sums[0],sqrt(sums[3]/sums[1]),maxs[1]/maxs[0]);
+  Real maxs[2] = {qMax, dMax};
+  Real sums[4] = {qSum, qSum2, dSum, dSum2};
+  MPI_Allreduce(MPI_IN_PLACE, &maxs, 2, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  MPI_Allreduce(MPI_IN_PLACE, &sums, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], sqrt(sums[3] / sums[1]),
+           maxs[1] / maxs[0]);
   fflush(stdout);
-  if (!plot) return;
+  if (!plot) {
+    return;
+  }
 
   printf("###\n");
-  const int k = nz/2;
-  //for (int j = 0; j < ny; j++) {
-  const int j = ny/2;
-    for (int i = 0; i < nx; i++) {
-      const int ijk = i+ng+(nx+ng+ng)*(j+ng+(ny+ng+ng)*(k+ng));
-      //printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]);
-      printf("%d %g %g %g\n",i,q[ijk],p[ijk],q[ijk]-p[ijk]);
-    }
-    printf("\n");
+  const int k = nz / 2;
+  // for (int j = 0; j < ny; j++) {
+  const int j = ny / 2;
+  for (int i = 0; i < nx; i++) {
+    const int ijk = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng));
+    // printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]);
+    printf("%d %g %g %g\n", i, q[ijk], p[ijk], q[ijk] - p[ijk]);
+  }
+  printf("\n");
   //}
 
   MPI_Finalize();
   exit(0);
 }
 
-Potential_Paris_3D::Potential_Paris_3D():
-  dn_{0,0,0},
-  dr_{0,0,0},
-  lo_{0,0,0},
-  lr_{0,0,0},
-  myLo_{0,0,0},
-  pp_(nullptr),
-  minBytes_(0),
-  densityBytes_(0),
-  potentialBytes_(0),
-  da_(nullptr),
-  db_(nullptr)
-{}
-
-Potential_Paris_3D::~Potential_Paris_3D() { Reset(); }
-
-void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const potential, const Real g, const Real offset, const Real a)
+PotentialParis3D::PotentialParis3D()
+    : dn_{0, 0, 0},
+      dr_{0, 0, 0},
+      lo_{0, 0, 0},
+      lr_{0, 0, 0},
+      myLo_{0, 0, 0},
+      pp_(nullptr),
+      minBytes_(0),
+      densityBytes_(0),
+      potentialBytes_(0),
+      da_(nullptr),
+      db_(nullptr)
 {
-#ifdef COSMOLOGY
-  const Real scale = Real(4)*M_PI*g/a;
-#else
-  const Real scale = Real(4)*M_PI*g;
-#endif
+}
+
+PotentialParis3D::~PotentialParis3D() { Reset(); }
+
+void PotentialParis3D::Get_Potential(const Real *const density, Real *const potential, const Real g, const Real offset,
+                                     const Real a)
+{
+  #ifdef COSMOLOGY
+  const Real scale = Real(4) * M_PI * g / a;
+  #else
+  const Real scale = Real(4) * M_PI * g;
+  #endif
   assert(da_);
   Real *const da = da_;
   Real *const db = db_;
@@ -83,45 +90,47 @@ void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const po
   const int nj = dn_[1];
   const int nk = dn_[0];
 
-  const int n = ni*nj*nk;
+  const int n = ni * nj * nk;
   #ifdef GRAVITY_GPU
-  CHECK(cudaMemcpy(db,density,densityBytes_,cudaMemcpyDeviceToDevice));
+  GPU_Error_Check(cudaMemcpy(db, density, densityBytes_, cudaMemcpyDeviceToDevice));
   #else
-  CHECK(cudaMemcpy(db,density,densityBytes_,cudaMemcpyHostToDevice));
+  GPU_Error_Check(cudaMemcpy(db, density, densityBytes_, cudaMemcpyHostToDevice));
   #endif
-  const int ngi = ni+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL;
-  const int ngj = nj+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL;
+  const int ngi = ni + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL;
+  const int ngj = nj + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL;
 
-  gpuFor(n,GPU_LAMBDA(const int i) { db[i] = scale*(db[i]-offset); });
-  pp_->solve(minBytes_,db,da);
   gpuFor(
-    nk,nj,ni,
-    GPU_LAMBDA(const int k, const int j, const int i) {
-      const int ia = i+ni*(j+nj*k);
-      const int ib = i+N_GHOST_POTENTIAL+ngi*(j+N_GHOST_POTENTIAL+ngj*(k+N_GHOST_POTENTIAL));
-      db[ib] = da[ia];
-    });
+      n, GPU_LAMBDA(const int i) { db[i] = scale * (db[i] - offset); });
+  pp_->solve(minBytes_, db, da);
+  gpuFor(
+      nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) {
+        const int ia = i + ni * (j + nj * k);
+        const int ib = i + N_GHOST_POTENTIAL + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL));
+        db[ib]       = da[ia];
+      });
 
   assert(potential);
   #ifdef GRAVITY_GPU
-  CHECK(cudaMemcpy(potential,db,potentialBytes_,cudaMemcpyDeviceToDevice));
+  GPU_Error_Check(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToDevice));
   #else
-  CHECK(cudaMemcpy(potential,db,potentialBytes_,cudaMemcpyDeviceToHost));
+  GPU_Error_Check(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToHost));
   #endif
 }
 
-void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, const Real zMin, const int nx, const int ny, const int nz, const int nxReal, const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz)
+void PotentialParis3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin,
+                                  const Real zMin, const int nx, const int ny, const int nz, const int nxReal,
+                                  const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz)
 {
   chprintf(" Using Poisson Solver: Paris Periodic");
-#ifdef PARIS_5PT
+  #ifdef PARIS_5PT
   chprintf(" 5-Point\n");
-#elif defined PARIS_3PT
+  #elif defined PARIS_3PT
   chprintf(" 3-Point\n");
-#else
+  #else
   chprintf(" Spectral\n");
-#endif
+  #endif
 
-  const long nl012 = long(nxReal)*long(nyReal)*long(nzReal);
+  const long nl012 = long(nxReal) * long(nyReal) * long(nzReal);
   assert(nl012 <= INT_MAX);
 
   dn_[0] = nzReal;
@@ -139,43 +148,53 @@ void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz,
   myLo_[0] = zMin;
   myLo_[1] = yMin;
   myLo_[2] = xMin;
-  MPI_Allreduce(myLo_,lo_,3,MPI_DOUBLE,MPI_MIN,MPI_COMM_WORLD);
-
-  const Real hi[3] = {lo_[0]+lz-dr_[0],lo_[1]+ly-dr_[1],lo_[2]+lx-dr_[2]};
-  const int n[3] = {nz,ny,nx};
-  const int m[3] = {n[0]/nzReal,n[1]/nyReal,n[2]/nxReal};
-  const int id[3] = {int(round((zMin-lo_[0])/(dn_[0]*dr_[0]))),int(round((yMin-lo_[1])/(dn_[1]*dr_[1]))),int(round((xMin-lo_[2])/(dn_[2]*dr_[2])))};
-  chprintf("  Paris: [ %g %g %g ]-[ %g %g %g ] N_local[ %d %d %d ] Tasks[ %d %d %d ]\n",lo_[2],lo_[1],lo_[0],lo_[2]+lx,lo_[1]+ly,lo_[0]+lz,dn_[2],dn_[1],dn_[0],m[2],m[1],m[0]);
-
-  assert(dn_[0] == n[0]/m[0]);
-  assert(dn_[1] == n[1]/m[1]);
-  assert(dn_[2] == n[2]/m[2]);
-
-  pp_ = new ParisPeriodic(n,lo_,hi,m,id);
+  MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+
+  const Real hi[3] = {lo_[0] + lz - dr_[0], lo_[1] + ly - dr_[1], lo_[2] + lx - dr_[2]};
+  const int n[3]   = {nz, ny, nx};
+  const int m[3]   = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal};
+  const int id[3]  = {int(round((zMin - lo_[0]) / (dn_[0] * dr_[0]))), int(round((yMin - lo_[1]) / (dn_[1] * dr_[1]))),
+                      int(round((xMin - lo_[2]) / (dn_[2] * dr_[2])))};
+  chprintf(
+      "  Paris: [ %g %g %g ]-[ %g %g %g ] N_local[ %d %d %d ] Tasks[ %d %d %d "
+      "]\n",
+      lo_[2], lo_[1], lo_[0], lo_[2] + lx, lo_[1] + ly, lo_[0] + lz, dn_[2], dn_[1], dn_[0], m[2], m[1], m[0]);
+
+  assert(dn_[0] == n[0] / m[0]);
+  assert(dn_[1] == n[1] / m[1]);
+  assert(dn_[2] == n[2] / m[2]);
+
+  pp_ = new ParisPeriodic(n, lo_, hi, m, id);
   assert(pp_);
-  minBytes_ = pp_->bytes();
-  densityBytes_ = long(sizeof(Real))*dn_[0]*dn_[1]*dn_[2];
-  const long gg = N_GHOST_POTENTIAL+N_GHOST_POTENTIAL;
-  potentialBytes_ = long(sizeof(Real))*(dn_[0]+gg)*(dn_[1]+gg)*(dn_[2]+gg);
+  minBytes_       = pp_->bytes();
+  densityBytes_   = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2];
+  const long gg   = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL;
+  potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg);
 
-  CHECK(cudaMalloc(reinterpret_cast<void **>(&da_),std::max(minBytes_,densityBytes_)));
+  GPU_Error_Check(cudaMalloc(reinterpret_cast<void **>(&da_), std::max(minBytes_, densityBytes_)));
   assert(da_);
 
-  CHECK(cudaMalloc(reinterpret_cast<void **>(&db_),std::max(minBytes_,potentialBytes_)));
+  GPU_Error_Check(cudaMalloc(reinterpret_cast<void **>(&db_), std::max(minBytes_, potentialBytes_)));
   assert(db_);
 }
 
-void Potential_Paris_3D::Reset()
+void PotentialParis3D::Reset()
 {
-  if (db_) CHECK(cudaFree(db_));
+  if (db_) {
+    GPU_Error_Check(cudaFree(db_));
+  }
   db_ = nullptr;
 
-  if (da_) CHECK(cudaFree(da_));
+  if (da_) {
+    GPU_Error_Check(cudaFree(da_));
+  }
   da_ = nullptr;
 
   potentialBytes_ = densityBytes_ = minBytes_ = 0;
 
-  if (pp_) delete pp_;
+  if (pp_) {
+    delete pp_;
+  }
   pp_ = nullptr;
 
   myLo_[2] = myLo_[1] = myLo_[0] = 0;
diff --git a/src/gravity/potential_paris_3D.h b/src/gravity/potential_paris_3D.h
index b6d85d5d2..be80c4116 100644
--- a/src/gravity/potential_paris_3D.h
+++ b/src/gravity/potential_paris_3D.h
@@ -2,25 +2,28 @@
 
 #if defined(GRAVITY) && defined(PARIS)
 
-#include "paris/ParisPeriodic.hpp"
-#include "../global/global.h"
+  #include "../global/global.h"
+  #include "paris/ParisPeriodic.hpp"
 
-class Potential_Paris_3D {
-  public:
-    Potential_Paris_3D();
-    ~Potential_Paris_3D();
-    void Get_Potential(const Real *density, Real *potential, Real g, Real massInfo, Real a);
-    void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz);
-    void Reset();
-  protected:
-    int dn_[3];
-    Real dr_[3],lo_[3],lr_[3],myLo_[3];
-    ParisPeriodic *pp_;
-    long minBytes_;
-    long densityBytes_;
-    long potentialBytes_;
-    Real *da_;
-    Real *db_;
+class PotentialParis3D
+{
+ public:
+  PotentialParis3D();
+  ~PotentialParis3D();
+  void Get_Potential(const Real *density, Real *potential, Real g, Real massInfo, Real a);
+  void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal,
+                  int nyReal, int nzReal, Real dx, Real dy, Real dz);
+  void Reset();
+
+ protected:
+  int dn_[3];
+  Real dr_[3], lo_[3], lr_[3], myLo_[3];
+  ParisPeriodic *pp_;
+  long minBytes_;
+  long densityBytes_;
+  long potentialBytes_;
+  Real *da_;
+  Real *db_;
 };
 
 #endif
diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu
index db53ea31a..fbb38df28 100644
--- a/src/gravity/potential_paris_galactic.cu
+++ b/src/gravity/potential_paris_galactic.cu
@@ -1,32 +1,36 @@
 #ifdef PARIS_GALACTIC
 
-#include "../gravity/potential_paris_galactic.h"
-#include "../io/io.h"
-#include "../utils/gpu.hpp"
-#include <cassert>
-
-Potential_Paris_Galactic::Potential_Paris_Galactic():
-  dn_{0,0,0},
-  dr_{0,0,0},
-  lo_{0,0,0},
-  lr_{0,0,0},
-  myLo_{0,0,0},
-  pp_(nullptr),
-  densityBytes_(0),
-  minBytes_(0),
-  da_(nullptr),
-  db_(nullptr)
-#ifndef GRAVITY_GPU
-  , potentialBytes_(0),
-  dc_(nullptr)
-#endif
-{}
+  #include <cassert>
+
+  #include "../gravity/potential_paris_galactic.h"
+  #include "../io/io.h"
+  #include "../utils/gpu.hpp"
+
+PotentialParisGalactic::PotentialParisGalactic()
+    : dn_{0, 0, 0},
+      dr_{0, 0, 0},
+      lo_{0, 0, 0},
+      lr_{0, 0, 0},
+      myLo_{0, 0, 0},
+      pp_(nullptr),
+      densityBytes_(0),
+      minBytes_(0),
+      da_(nullptr),
+      db_(nullptr)
+  #ifndef GRAVITY_GPU
+      ,
+      potentialBytes_(0),
+      dc_(nullptr)
+  #endif
+{
+}
 
-Potential_Paris_Galactic::~Potential_Paris_Galactic() { Reset(); }
+PotentialParisGalactic::~PotentialParisGalactic() { Reset(); }
 
-void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *const potential, const Real g, const DiskGalaxy &galaxy)
+void PotentialParisGalactic::Get_Potential(const Real *const density, Real *const potential, const Real g,
+                                           const DiskGalaxy &galaxy)
 {
-  const Real scale = Real(4)*M_PI*g;
+  const Real scale = Real(4) * M_PI * g;
 
   assert(da_);
   Real *const da = da_;
@@ -37,18 +41,18 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co
   const int nj = dn_[1];
   const int nk = dn_[0];
 
-  const int ngi = ni+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL;
-  const int ngj = nj+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL;
+  const int ngi = ni + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL;
+  const int ngj = nj + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL;
 
-#ifdef GRAVITY_GPU
+  #ifdef GRAVITY_GPU
   const Real *const rho = density;
-  Real *const phi = potential;
-#else
-  CHECK(cudaMemcpyAsync(da,density,densityBytes_,cudaMemcpyHostToDevice,0));
-  CHECK(cudaMemcpyAsync(dc_,potential,potentialBytes_,cudaMemcpyHostToDevice,0));
+  Real *const phi       = potential;
+  #else
+  GPU_Error_Check(cudaMemcpyAsync(da, density, densityBytes_, cudaMemcpyHostToDevice, 0));
+  GPU_Error_Check(cudaMemcpyAsync(dc_, potential, potentialBytes_, cudaMemcpyHostToDevice, 0));
   const Real *const rho = da;
-  Real *const phi = dc_;
-#endif
+  Real *const phi       = dc_;
+  #endif
 
   const Real xMin = myLo_[2];
   const Real yMin = myLo_[1];
@@ -58,59 +62,59 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co
   const Real dy = dr_[1];
   const Real dz = dr_[0];
 
-  const Real md = galaxy.getM_d();
+  const Real md = SIMULATED_FRACTION * galaxy.getM_d();
   const Real rd = galaxy.getR_d();
   const Real zd = galaxy.getZ_d();
 
-  const Real rho0 = md*zd*zd/(4.0*M_PI);
+  const Real rho0 = md * zd * zd / (4.0 * M_PI);
   gpuFor(
-    nk,nj,ni,
-    GPU_LAMBDA(const int k, const int j, const int i) {
-      const int ia = i+ni*(j+nj*k);
+      nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) {
+        const int ia = i + ni * (j + nj * k);
 
-      const Real x = xMin+i*dx;
-      const Real y = yMin+j*dy;
-      const Real z = zMin+k*dz;
+        const Real x = xMin + i * dx;
+        const Real y = yMin + j * dy;
+        const Real z = zMin + k * dz;
 
-      const Real r = sqrt(x*x+y*y);
-      const Real a = sqrt(z*z+zd*zd);
-      const Real b = rd+a;
-      const Real c = r*r+b*b;
-      const Real dRho = rho0*(rd*c+3.0*a*b*b)/(a*a*a*pow(c,2.5));
+        const Real r    = sqrt(x * x + y * y);
+        const Real a    = sqrt(z * z + zd * zd);
+        const Real b    = rd + a;
+        const Real c    = r * r + b * b;
+        const Real dRho = rho0 * (rd * c + 3.0 * a * b * b) / (a * a * a * pow(c, 2.5));
 
-      da[ia] = scale*(rho[ia]-dRho);
-    });
+        da[ia] = scale * (rho[ia] - dRho);
+      });
 
-  pp_->solve(minBytes_,da,db);
+  pp_->solve(minBytes_, da, db);
 
-  const Real phi0 = -g*md;
+  const Real phi0 = -g * md;
   gpuFor(
-    nk,nj,ni,
-    GPU_LAMBDA(const int k, const int j, const int i) {
-      const int ia = i+ni*(j+nj*k);
-      const int ib = i+N_GHOST_POTENTIAL+ngi*(j+N_GHOST_POTENTIAL+ngj*(k+N_GHOST_POTENTIAL));
-
-      const Real x = xMin+i*dx;
-      const Real y = yMin+j*dy;
-      const Real z = zMin+k*dz;
-
-      const Real r = sqrt(x*x+y*y);
-      const Real a = sqrt(z*z+zd*zd);
-      const Real b = a+rd;
-      const Real c = sqrt(r*r+b*b);
-      const Real dPhi = phi0/c;
-
-      phi[ib] = db[ia]+dPhi;
-    });
-
-#ifndef GRAVITY_GPU
-  CHECK(cudaMemcpy(potential,dc_,potentialBytes_,cudaMemcpyDeviceToHost));
-#endif
+      nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) {
+        const int ia = i + ni * (j + nj * k);
+        const int ib = i + N_GHOST_POTENTIAL + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL));
+
+        const Real x = xMin + i * dx;
+        const Real y = yMin + j * dy;
+        const Real z = zMin + k * dz;
+
+        const Real r    = sqrt(x * x + y * y);
+        const Real a    = sqrt(z * z + zd * zd);
+        const Real b    = a + rd;
+        const Real c    = sqrt(r * r + b * b);
+        const Real dPhi = phi0 / c;
+
+        phi[ib] = db[ia] + dPhi;
+      });
+
+  #ifndef GRAVITY_GPU
+  GPU_Error_Check(cudaMemcpy(potential, dc_, potentialBytes_, cudaMemcpyDeviceToHost));
+  #endif
 }
 
-void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, const Real zMin, const int nx, const int ny, const int nz, const int nxReal, const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz)
+void PotentialParisGalactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin,
+                                        const Real zMin, const int nx, const int ny, const int nz, const int nxReal,
+                                        const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz)
 {
-  const long nl012 = long(nxReal)*long(nyReal)*long(nzReal);
+  const long nl012 = long(nxReal) * long(nyReal) * long(nzReal);
   assert(nl012 <= INT_MAX);
 
   dn_[0] = nzReal;
@@ -125,53 +129,66 @@ void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Re
   lr_[1] = ly;
   lr_[2] = lx;
 
-  myLo_[0] = zMin+0.5*dr_[0];
-  myLo_[1] = yMin+0.5*dr_[1];
-  myLo_[2] = xMin+0.5*dr_[2];
-  MPI_Allreduce(myLo_,lo_,3,MPI_DOUBLE,MPI_MIN,MPI_COMM_WORLD);
-
-  const Real hi[3] = {lo_[0]+lr_[0]-dr_[0],lo_[1]+lr_[1]-dr_[1],lo_[2]+lr_[1]-dr_[2]};
-  const int n[3] = {nz,ny,nx};
-  const int m[3] = {n[0]/nzReal,n[1]/nyReal,n[2]/nxReal};
-  const int id[3] = {int(round((myLo_[0]-lo_[0])/(dn_[0]*dr_[0]))),int(round((myLo_[1]-lo_[1])/(dn_[1]*dr_[1]))),int(round((myLo_[2]-lo_[2])/(dn_[2]*dr_[2])))};
-  chprintf(" Paris Galactic: [ %g %g %g ]-[ %g %g %g ] n_local[ %d %d %d ] tasks[ %d %d %d ]\n",lo_[2],lo_[1],lo_[0],hi[2],hi[1],hi[0],dn_[2],dn_[1],dn_[0],m[2],m[1],m[0]);
-
-  assert(dn_[0] == n[0]/m[0]);
-  assert(dn_[1] == n[1]/m[1]);
-  assert(dn_[2] == n[2]/m[2]);
-
-  pp_ = new PoissonZero3DBlockedGPU(n,lo_,hi,m,id);
+  myLo_[0] = zMin + 0.5 * dr_[0];
+  myLo_[1] = yMin + 0.5 * dr_[1];
+  myLo_[2] = xMin + 0.5 * dr_[2];
+  MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+
+  const Real hi[3] = {lo_[0] + lr_[0] - dr_[0], lo_[1] + lr_[1] - dr_[1], lo_[2] + lr_[1] - dr_[2]};
+  const int n[3]   = {nz, ny, nx};
+  const int m[3]   = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal};
+  const int id[3]  = {int(round((myLo_[0] - lo_[0]) / (dn_[0] * dr_[0]))),
+                      int(round((myLo_[1] - lo_[1]) / (dn_[1] * dr_[1]))),
+                      int(round((myLo_[2] - lo_[2]) / (dn_[2] * dr_[2])))};
+  chprintf(
+      " Paris Galactic: [ %g %g %g ]-[ %g %g %g ] n_local[ %d %d %d ] tasks[ "
+      "%d %d %d ]\n",
+      lo_[2], lo_[1], lo_[0], hi[2], hi[1], hi[0], dn_[2], dn_[1], dn_[0], m[2], m[1], m[0]);
+
+  assert(dn_[0] == n[0] / m[0]);
+  assert(dn_[1] == n[1] / m[1]);
+  assert(dn_[2] == n[2] / m[2]);
+
+  pp_ = new PoissonZero3DBlockedGPU(n, lo_, hi, m, id);
   assert(pp_);
-  minBytes_ = pp_->bytes();
-  densityBytes_ = long(sizeof(Real))*dn_[0]*dn_[1]*dn_[2];
+  minBytes_     = pp_->bytes();
+  densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2];
 
-  CHECK(cudaMalloc(reinterpret_cast<void **>(&da_),std::max(minBytes_,densityBytes_)));
-  CHECK(cudaMalloc(reinterpret_cast<void **>(&db_),std::max(minBytes_,densityBytes_)));
+  GPU_Error_Check(cudaMalloc(reinterpret_cast<void **>(&da_), std::max(minBytes_, densityBytes_)));
+  GPU_Error_Check(cudaMalloc(reinterpret_cast<void **>(&db_), std::max(minBytes_, densityBytes_)));
 
-#ifndef GRAVITY_GPU
-  const long gg = N_GHOST_POTENTIAL+N_GHOST_POTENTIAL;
-  potentialBytes_ = long(sizeof(Real))*(dn_[0]+gg)*(dn_[1]+gg)*(dn_[2]+gg);
-  CHECK(cudaMalloc(reinterpret_cast<void **>(&dc_),potentialBytes_));
-#endif
+  #ifndef GRAVITY_GPU
+  const long gg   = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL;
+  potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg);
+  GPU_Error_Check(cudaMalloc(reinterpret_cast<void **>(&dc_), potentialBytes_));
+  #endif
 }
 
-void Potential_Paris_Galactic::Reset()
+void PotentialParisGalactic::Reset()
 {
-#ifndef GRAVITY_GPU
-  if (dc_) CHECK(cudaFree(dc_));
-  dc_ = nullptr;
+  #ifndef GRAVITY_GPU
+  if (dc_) {
+    GPU_Error_Check(cudaFree(dc_));
+  }
+  dc_             = nullptr;
   potentialBytes_ = 0;
-#endif
+  #endif
 
-  if (db_) CHECK(cudaFree(db_));
+  if (db_) {
+    GPU_Error_Check(cudaFree(db_));
+  }
   db_ = nullptr;
 
-  if (da_) CHECK(cudaFree(da_));
+  if (da_) {
+    GPU_Error_Check(cudaFree(da_));
+  }
   da_ = nullptr;
 
   densityBytes_ = minBytes_ = 0;
 
-  if (pp_) delete pp_;
+  if (pp_) {
+    delete pp_;
+  }
   pp_ = nullptr;
 
   myLo_[2] = myLo_[1] = myLo_[0] = 0;
diff --git a/src/gravity/potential_paris_galactic.h b/src/gravity/potential_paris_galactic.h
index bb05fa310..95fb1fc1f 100644
--- a/src/gravity/potential_paris_galactic.h
+++ b/src/gravity/potential_paris_galactic.h
@@ -2,29 +2,32 @@
 
 #ifdef PARIS_GALACTIC
 
-#include "paris/PoissonZero3DBlockedGPU.hpp"
-#include "../global/global.h"
-#include "../model/disk_galaxy.h"
+  #include "../global/global.h"
+  #include "../model/disk_galaxy.h"
+  #include "paris/PoissonZero3DBlockedGPU.hpp"
 
-class Potential_Paris_Galactic {
-  public:
-    Potential_Paris_Galactic();
-    ~Potential_Paris_Galactic();
-    void Get_Potential(const Real *density, Real *potential, Real g, const DiskGalaxy &galaxy);
-    void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz);
-    void Reset();
-  protected:
-    int dn_[3];
-    Real dr_[3],lo_[3],lr_[3],myLo_[3];
-    PoissonZero3DBlockedGPU *pp_;
-    long densityBytes_;
-    long minBytes_;
-    Real *da_;
-    Real *db_;
-#ifndef GRAVITY_GPU
-    long potentialBytes_;
-    Real *dc_;
-#endif
+class PotentialParisGalactic
+{
+ public:
+  PotentialParisGalactic();
+  ~PotentialParisGalactic();
+  void Get_Potential(const Real *density, Real *potential, Real g, const DiskGalaxy &galaxy);
+  void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal,
+                  int nyReal, int nzReal, Real dx, Real dy, Real dz);
+  void Reset();
+
+ protected:
+  int dn_[3];
+  Real dr_[3], lo_[3], lr_[3], myLo_[3];
+  PoissonZero3DBlockedGPU *pp_;
+  long densityBytes_;
+  long minBytes_;
+  Real *da_;
+  Real *db_;
+  #ifndef GRAVITY_GPU
+  long potentialBytes_;
+  Real *dc_;
+  #endif
 };
 
 #endif
diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h
index 3ddbb86be..e671555bf 100644
--- a/src/gravity/static_grav.h
+++ b/src/gravity/static_grav.h
@@ -2,161 +2,209 @@
  *  \brief Definitions of functions to calculate gravitational
            acceleration in 1, 2, and 3D. Called in Update_Conserved_Variables
            functions in hydro_cuda.cu. */
-#ifdef CUDA
 
 #pragma once
 
+#include <math.h>  // provides sqrt log cos sin atan etc.
 #include <stdio.h>
-#include <math.h> // provides sqrt log cos sin atan etc.
-#include "../global/global.h" // provides GN etc.
+
+#include "../global/global.h"  // provides GN etc.
 
 // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5
-static inline __device__ Real pow2(const Real x) { return x*x; }
+static inline __device__ Real pow2(const Real x) { return x * x; }
 
-inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx)
+inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, int custom_grav, Real dx, Real xbound, Real *gx)
 {
   Real x_pos, r_disk, r_halo;
-  x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound;
-
-  // for disk components, calculate polar r
-  //r_disk = 0.220970869121;
-  //r_disk = 6.85009694274;
-  r_disk = 13.9211647546;
-  //r_disk = 20.9922325665;
-  // for halo, calculate spherical r
-  r_halo = sqrt(x_pos*x_pos + r_disk*r_disk);
-
-  // set properties of halo and disk (these must match initial conditions)
-  Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x;
-  M_vir = 1.0e12; // viral mass of MW in M_sun
-  M_d = 6.5e10; // mass of disk in M_sun
-  M_h = M_vir - M_d; // halo mass in M_sun
-  R_vir = 261; // viral radius in kpc
-  c_vir = 20.0; // halo concentration
-  R_h = R_vir / c_vir; // halo scale length in kpc
-  R_d = 3.5; // disk scale length in kpc
-  z_d = 3.5/5.0; // disk scale height in kpc
-  phi_0_h = GN * M_h / (log(1.0+c_vir) - c_vir / (1.0+c_vir));
-  x = r_halo / R_h;
-
-  // calculate acceleration due to NFW halo & Miyamoto-Nagai disk
-  a_halo = - phi_0_h * (log(1+x) - x/(1+x)) / (r_halo*r_halo);
-  a_disk_z = - GN * M_d * x_pos * (R_d + sqrt(x_pos*x_pos + z_d*z_d)) / ( pow(r_disk*r_disk + pow2(R_d + sqrt(x_pos*x_pos + z_d*z_d)), 1.5) * sqrt(x_pos*x_pos + z_d*z_d) );
-
-  // total acceleration is the sum of the halo + disk components
-  *gx = (x_pos/r_halo)*a_halo + a_disk_z;
-
+  x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound;
+  // set gravity field according to parameter file input
+  switch (custom_grav) {
+    case 1:
+      // 1D NFW halo & Miyamoto-Nagai disk
+      //  for disk components, calculate polar r
+      //  r_disk = 0.220970869121;
+      //  r_disk = 6.85009694274;
+      r_disk = 13.9211647546;
+      // r_disk = 20.9922325665;
+      //  for halo, calculate spherical r
+      r_halo = sqrt(x_pos * x_pos + r_disk * r_disk);
+
+      // set properties of halo and disk (these must match initial conditions)
+      Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x;
+      M_vir   = 1.0e12;         // viral mass of MW in M_sun
+      M_d     = 6.5e10;         // mass of disk in M_sun
+      M_h     = M_vir - M_d;    // halo mass in M_sun
+      R_vir   = 261;            // viral radius in kpc
+      c_vir   = 20.0;           // halo concentration
+      R_h     = R_vir / c_vir;  // halo scale length in kpc
+      R_d     = 3.5;            // disk scale length in kpc
+      z_d     = 3.5 / 5.0;      // disk scale height in kpc
+      phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir));
+      x       = r_halo / R_h;
+
+      // calculate acceleration due to NFW halo & Miyamoto-Nagai disk
+      a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo);
+      a_disk_z =
+          -GN * M_d * x_pos * (R_d + sqrt(x_pos * x_pos + z_d * z_d)) /
+          (pow(r_disk * r_disk + pow2(R_d + sqrt(x_pos * x_pos + z_d * z_d)), 1.5) * sqrt(x_pos * x_pos + z_d * z_d));
+
+      // total acceleration is the sum of the halo + disk components
+      *gx = (x_pos / r_halo) * a_halo + a_disk_z;
+      break;
+    default:
+      *gx = 0;
+  }
   return;
-
 }
 
-
-inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy)
+inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, int custom_grav, Real dx, Real dy,
+                                 Real xbound, Real ybound, Real *gx, Real *gy)
 {
   Real x_pos, y_pos, r, phi;
-  // use the subgrid offset and global boundaries to calculate absolute positions on the grid
-  x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound;
-  y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound;
-
-  // for Gresho, also need r & phi
-  r = sqrt(x_pos*x_pos + y_pos*y_pos);
+  // use the subgrid offset and global boundaries to calculate absolute
+  // positions on the grid
+  x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound;
+  y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound;
+  // for Gresho and disks, also need r & phi
+  r   = sqrt(x_pos * x_pos + y_pos * y_pos);
   phi = atan2(y_pos, x_pos);
-
-/*
-  // set acceleration to balance v_phi in Gresho problem
-  if (r < 0.2) {
-    *gx = -cos(phi)*25.0*r;
-    *gy = -sin(phi)*25.0*r;
-  }
-  else if (r >= 0.2 && r < 0.4) {
-    *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r;
-    *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r;
-  }
-  else {
-    *gx = 0.0;
-    *gy = 0.0;
+  switch (custom_grav) {
+    case 1:
+      // Gresho vortex
+      // set acceleration to balance v_phi in Gresho problem
+      if (r < 0.2) {
+        *gx = -cos(phi) * 25.0 * r;
+        *gy = -sin(phi) * 25.0 * r;
+      } else if (r >= 0.2 && r < 0.4) {
+        *gx = -cos(phi) * (4.0 - 20.0 * r + 25.0 * r * r) / r;
+        *gy = -sin(phi) * (4.0 - 20.0 * r + 25.0 * r * r) / r;
+      } else {
+        *gx = 0.0;
+        *gy = 0.0;
+      }
+      break;
+    case 2:
+      // Rayleigh-Taylor instability
+      *gx = 0;
+      *gy = -1;
+      break;
+    case 3:
+      // 2D disk in keplerian rotation
+      Real M;
+      M   = 1 * MSUN_CGS;
+      *gx = -cos(phi) * GN * M / (r * r);
+      *gy = -sin(phi) * GN * M / (r * r);
+      break;
+    case 4:
+      // set gravitational acceleration for Kuzmin disk + NFW halo
+      Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x;
+      M_vir = 1.0e12;         // viral mass of MW in M_sun
+      M_d   = 6.5e10;         // mass of disk in M_sun (assume all gas)
+      M_h   = M_vir - M_d;    // halo mass in M_sun
+      R_vir = 261;            // viral radius in kpc
+      c_vir = 20;             // halo concentration
+      R_s   = R_vir / c_vir;  // halo scale length in kpc
+      R_d   = 3.5;            // disk scale length in kpc
+
+      // calculate acceleration
+      x   = r / R_s;
+      a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5);
+      a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r);
+      a   = a_d + a_h;
+
+      *gx = -cos(phi) * a;
+      *gy = -sin(phi) * a;
+      break;
+    default:
+      *gx = 0;
+      *gy = 0;
   }
-*/
-/*
-  // set gravitational acceleration for Keplarian potential
-  Real M;
-  M = 1*Msun;
-  *gx = -cos(phi)*GN*M/(r*r);
-  *gy = -sin(phi)*GN*M/(r*r);
-*/
-  // set gravitational acceleration for Kuzmin disk + NFW halo
-  Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x;
-  M_vir = 1.0e12; // viral mass of MW in M_sun
-  M_d = 6.5e10; // mass of disk in M_sun (assume all gas)
-  M_h = M_vir - M_d; // halo mass in M_sun
-  R_vir = 261; // viral radius in kpc
-  c_vir = 20; // halo concentration
-  R_s = R_vir / c_vir; // halo scale length in kpc
-  R_d = 3.5; // disk scale length in kpc
-
-  // calculate acceleration
-  x = r / R_s;
-  a_d = GN * M_d * r * pow(r*r + R_d*R_d, -1.5);
-  a_h = GN * M_h * (log(1+x)- x / (1+x)) / ((log(1+c_vir) - c_vir / (1+c_vir)) * r*r);
-  a = a_d + a_h;
-
-  *gx = -cos(phi)*a;
-  *gy = -sin(phi)*a;
 
   return;
 }
 
-
-inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz)
+inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost,
+                                 int custom_grav, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound,
+                                 Real *gx, Real *gy, Real *gz)
 {
   Real x_pos, y_pos, z_pos, r_disk, r_halo;
-  // use the subgrid offset and global boundaries to calculate absolute positions on the grid
-  x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound;
-  y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound;
-  z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound;
+  // use the subgrid offset and global boundaries to calculate absolute
+  // positions on the grid
+  x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound;
+  y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound;
+  z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound;
 
   // for disk components, calculate polar r
-  r_disk = sqrt(x_pos*x_pos + y_pos*y_pos);
+  r_disk = sqrt(x_pos * x_pos + y_pos * y_pos);
   // for halo, calculate spherical r
-  r_halo = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos);
-
-  // set properties of halo and disk (these must match initial conditions)
+  r_halo = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos);
   Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z;
   Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x;
-  // MW model
-  M_vir = 1.0e12; // viral mass of in M_sun
-  M_d = 6.5e10; // viral mass of in M_sun
-  R_d = 3.5; // disk scale length in kpc
-  z_d = 3.5/5.0; // disk scale height in kpc
-  R_vir = 261.; // virial radius in kpc
-  c_vir = 20.0; // halo concentration
-  // M82 model
-  //M_vir = 5.0e10; // viral mass of in M_sun
-  //M_d = 1.0e10; // mass of disk in M_sun
-  //R_d = 0.8; // disk scale length in kpc
-  //z_d = 0.15; // disk scale height in kpc
-  //R_vir = R_d/0.015; // viral radius in kpc
-  //c_vir = 10.0; // halo concentration
-
-  M_h = M_vir - M_d; // halo mass in M_sun
-  R_h = R_vir / c_vir; // halo scale length in kpc
-  phi_0_h = GN * M_h / (log(1.0+c_vir) - c_vir / (1.0+c_vir));
-  x = r_halo / R_h;
-
-  // calculate acceleration due to NFW halo & Miyamoto-Nagai disk
-  a_halo = - phi_0_h * (log(1+x) - x/(1+x)) / (r_halo*r_halo);
-  a_halo_r = a_halo*(r_disk/r_halo);
-  a_halo_z = a_halo*(z_pos/r_halo);
-  a_disk_r = - GN * M_d * r_disk * pow(r_disk*r_disk+ pow2(R_d + sqrt(z_pos*z_pos + z_d*z_d)), -1.5);
-  a_disk_z = - GN * M_d * z_pos * (R_d + sqrt(z_pos*z_pos + z_d*z_d)) / ( pow(r_disk*r_disk + pow2(R_d + sqrt(z_pos*z_pos + z_d*z_d)), 1.5) * sqrt(z_pos*z_pos + z_d*z_d) );
-
-  // total acceleration is the sum of the halo + disk components
-  *gx = (x_pos/r_disk)*(a_disk_r+a_halo_r);
-  *gy = (y_pos/r_disk)*(a_disk_r+a_halo_r);
-  *gz = a_disk_z+a_halo_z;
-
+  switch (custom_grav) {
+    case 1:
+      // Milky way disk model
+      // set properties of halo and disk (these must match initial conditions)
+
+      M_vir = 1.0e12;     // viral mass of in M_sun
+      M_d   = 6.5e10;     // viral mass of in M_sun
+      R_d   = 3.5;        // disk scale length in kpc
+      z_d   = 3.5 / 5.0;  // disk scale height in kpc
+      R_vir = 261.;       // virial radius in kpc
+      c_vir = 20.0;       // halo concentration
+
+      M_h     = M_vir - M_d;    // halo mass in M_sun
+      R_h     = R_vir / c_vir;  // halo scale length in kpc
+      phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir));
+      x       = r_halo / R_h;
+
+      // calculate acceleration due to NFW halo & Miyamoto-Nagai disk
+      a_halo   = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo);
+      a_halo_r = a_halo * (r_disk / r_halo);
+      a_halo_z = a_halo * (z_pos / r_halo);
+      a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5);
+      a_disk_z =
+          -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) /
+          (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d));
+
+      // total acceleration is the sum of the halo + disk components
+      *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r);
+      *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r);
+      *gz = a_disk_z + a_halo_z;
+      break;
+    case 2:
+      // M82 model
+      // set properties of halo and disk (these must match initial conditions)
+
+      M_vir = 5.0e10;       // viral mass of in M_sun
+      M_d   = 1.0e10;       // mass of disk in M_sun
+      R_d   = 0.8;          // disk scale length in kpc
+      z_d   = 0.15;         // disk scale height in kpc
+      R_vir = R_d / 0.015;  // viral radius in kpc
+      c_vir = 10.0;         // halo concentration
+
+      M_h     = M_vir - M_d;    // halo mass in M_sun
+      R_h     = R_vir / c_vir;  // halo scale length in kpc
+      phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir));
+      x       = r_halo / R_h;
+
+      // calculate acceleration due to NFW halo & Miyamoto-Nagai disk
+      a_halo   = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo);
+      a_halo_r = a_halo * (r_disk / r_halo);
+      a_halo_z = a_halo * (z_pos / r_halo);
+      a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5);
+      a_disk_z =
+          -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) /
+          (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d));
+
+      // total acceleration is the sum of the halo + disk components
+      *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r);
+      *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r);
+      *gz = a_disk_z + a_halo_z;
+      break;
+    default:
+      *gx = 0;
+      *gy = 0;
+      *gz = 0;
+  }
   return;
 }
-
-#endif //CUDA
-
diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp
index 341360eba..d9201fe8b 100644
--- a/src/grid/boundary_conditions.cpp
+++ b/src/grid/boundary_conditions.cpp
@@ -2,122 +2,124 @@
  *  \brief Definitions of the boundary conditions for various tests.
            Functions are members of the Grid3D class. */
 
-#include <stdlib.h>
-#include <stdio.h>
 #include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+
+#include "../grid/cuda_boundaries.h"  // provides SetGhostCells
 #include "../grid/grid3D.h"
 #include "../io/io.h"
-#include "../utils/error_handling.h"
 #include "../mpi/mpi_routines.h"
+#include "../utils/error_handling.h"
 
-#include "../grid/cuda_boundaries.h" // provides SetGhostCells
-
-
-/*! \fn void Set_Boundary_Conditions_Grid(parameters P)
- *  \brief Set the boundary conditions for all components based on info in the parameters structure. */
-void Grid3D::Set_Boundary_Conditions_Grid( parameters P){
-
-  #ifndef ONLY_PARTICLES
+/*! \fn void Set_Boundary_Conditions_Grid(Parameters P )
+ *  \brief Set the boundary conditions for all components based on info in the
+ * parameters structure. */
+void Grid3D::Set_Boundary_Conditions_Grid(Parameters P)
+{
+#ifndef ONLY_PARTICLES
   // Dont transfer Hydro boundaries when only doing particles
 
   // Transfer Hydro Conserved boundaries
   #ifdef CPU_TIME
   Timer.Boundaries.Start();
-  #endif //CPU_TIME
+  #endif  // CPU_TIME
   H.TRANSFER_HYDRO_BOUNDARIES = true;
   Set_Boundary_Conditions(P);
   H.TRANSFER_HYDRO_BOUNDARIES = false;
   #ifdef CPU_TIME
   Timer.Boundaries.End();
-  #endif //CPU_TIME
-  #endif //ONLY_PARTICLES
+  #endif  // CPU_TIME
+#endif    // ONLY_PARTICLES
 
-  // If the Gravity coupling is on the CPU, the potential is not in the Conserved arrays,
-  // and its boundaries need to be transferred separately
-  #ifdef GRAVITY
+// If the Gravity coupling is on the CPU, the potential is not in the Conserved
+// arrays, and its boundaries need to be transferred separately
+#ifdef GRAVITY
   #ifdef CPU_TIME
   Timer.Pot_Boundaries.Start();
-  #endif
+  #endif  // CPU_TIME
   Grav.TRANSFER_POTENTIAL_BOUNDARIES = true;
   Set_Boundary_Conditions(P);
   Grav.TRANSFER_POTENTIAL_BOUNDARIES = false;
   #ifdef CPU_TIME
   Timer.Pot_Boundaries.End();
-  #endif
-  #endif
+  #endif  // CPU_TIME
+#endif    // GRAVITY
 }
 
-/*! \fn void Set_Boundary_Conditions(parameters P)
- *  \brief Set the boundary conditions based on info in the parameters structure. */
-void Grid3D::Set_Boundary_Conditions(parameters P) {
-
-  //Check Only one boundary type id being transferred
+/*! \fn void Set_Boundary_Conditions(Parameters P )
+ *  \brief Set the boundary conditions based on info in the parameters
+ * structure. */
+void Grid3D::Set_Boundary_Conditions(Parameters P)
+{
+  // Check Only one boundary type id being transferred
   int n_bounds = 0;
-  n_bounds += (int) H.TRANSFER_HYDRO_BOUNDARIES;
-  #ifdef GRAVITY
-  n_bounds += (int) Grav.TRANSFER_POTENTIAL_BOUNDARIES;
+  n_bounds += (int)H.TRANSFER_HYDRO_BOUNDARIES;
+#ifdef GRAVITY
+  n_bounds += (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES;
+  #ifdef SOR
+  n_bounds += (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES;
+  #endif  // SOR
+#endif    // GRAVITY
+#ifdef PARTICLES
+  n_bounds += (int)Particles.TRANSFER_PARTICLES_BOUNDARIES;
+  n_bounds += (int)Particles.TRANSFER_DENSITY_BOUNDARIES;
+#endif  // PARTICLES
+
+  if (n_bounds > 1) {
+    printf(
+        "ERROR: More than one boundary type for transfer. N boundary types: "
+        "%d\n",
+        n_bounds);
+    printf(" Boundary Hydro: %d\n", (int)H.TRANSFER_HYDRO_BOUNDARIES);
+#ifdef GRAVITY
+    printf(" Boundary Potential: %d\n", (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES);
   #ifdef SOR
-  n_bounds += (int) Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES;
-  #endif //SOR
-  #endif //GRAVITY
-  #ifdef PARTICLES
-  n_bounds += (int) Particles.TRANSFER_PARTICLES_BOUNDARIES;
-  n_bounds += (int) Particles.TRANSFER_DENSITY_BOUNDARIES;
-  #endif  //PARTICLES
-
-  if ( n_bounds > 1 ){
-    printf("ERROR: More than one boundary type for transfer. N boundary types: %d\n", n_bounds );
-    printf(" Boundary Hydro: %d\n", (int) H.TRANSFER_HYDRO_BOUNDARIES );
-    #ifdef GRAVITY
-    printf(" Boundary Potential: %d\n", (int) Grav.TRANSFER_POTENTIAL_BOUNDARIES );
-    #ifdef SOR
-    printf(" Boundary Poisson: %d\n", (int) Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES );
-    #endif //SOR
-    #endif //GRAVITY
-    #ifdef PARTICLES
-    printf(" Boundary Particles: %d\n", (int) Particles.TRANSFER_PARTICLES_BOUNDARIES );
-    printf(" Boundary Particles Density: %d\n", (int) Particles.TRANSFER_DENSITY_BOUNDARIES );
-    #endif //PARTICLES
+    printf(" Boundary Poisson: %d\n", (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES);
+  #endif  // SOR
+#endif    // GRAVITY
+#ifdef PARTICLES
+    printf(" Boundary Particles: %d\n", (int)Particles.TRANSFER_PARTICLES_BOUNDARIES);
+    printf(" Boundary Particles Density: %d\n", (int)Particles.TRANSFER_DENSITY_BOUNDARIES);
+#endif  // PARTICLES
     exit(-1);
   }
 
   // If no boundaries are set to be transferred then exit;
-  if ( n_bounds == 0 ){
-     printf( " Warning: No boundary type for transfer \n");
-     return;
+  if (n_bounds == 0) {
+    printf(" Warning: No boundary type for transfer \n");
+    return;
   }
 
-
 #ifndef MPI_CHOLLA
 
-  int flags[6] = {0,0,0,0,0,0};
+  int flags[6] = {0, 0, 0, 0, 0, 0};
 
   // Check for custom boundary conditions and set boundary flags
-  if(Check_Custom_Boundary(&flags[0], P))
-  {
+  if (Check_Custom_Boundary(&flags[0], P)) {
     Custom_Boundary(P.custom_bcnd);
   }
 
   // set regular boundaries
-  if(H.nx>1) {
+  if (H.nx > 1) {
     Set_Boundaries(0, flags);
     Set_Boundaries(1, flags);
   }
-  if(H.ny>1) {
+  if (H.ny > 1) {
     Set_Boundaries(2, flags);
     Set_Boundaries(3, flags);
   }
-  if(H.nz>1) {
+  if (H.nz > 1) {
     Set_Boundaries(4, flags);
     Set_Boundaries(5, flags);
   }
 
   #ifdef GRAVITY
-  Grav.Set_Boundary_Flags( flags );
-  #endif  //Gravity
+  Grav.Set_Boundary_Flags(flags);
+  #endif  // Gravity
 
-#else  /*MPI_CHOLLA*/
+#else /*MPI_CHOLLA*/
 
   /*Set boundaries, including MPI exchanges*/
 
@@ -126,40 +128,29 @@ void Grid3D::Set_Boundary_Conditions(parameters P) {
 #endif /*MPI_CHOLLA*/
 }
 
-
-/*! \fn int Check_Custom_Boundary(int *flags, struct parameters P)
+/*! \fn int Check_Custom_Boundary(int *flags, struct Parameters P)
  *  \brief Check for custom boundary conditions and set boundary flags. */
-int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P)
+int Grid3D::Check_Custom_Boundary(int *flags, struct Parameters P)
 {
-
   /*check if any boundary is a custom boundary*/
   /*if yes, then return 1*/
   /*if no, then return 0*/
   /*additionally, set a flag for each boundary*/
 
-  if(H.nx>1)
-  {
-    *(flags+0) = P.xl_bcnd;
-    *(flags+1) = P.xu_bcnd;
+  if (H.nx > 1) {
+    *(flags + 0) = P.xl_bcnd;
+    *(flags + 1) = P.xu_bcnd;
   }
-  if(H.ny>1)
-  {
-    *(flags+2) = P.yl_bcnd;
-    *(flags+3) = P.yu_bcnd;
+  if (H.ny > 1) {
+    *(flags + 2) = P.yl_bcnd;
+    *(flags + 3) = P.yu_bcnd;
   }
-  if(H.nz>1)
-  {
-    *(flags+4) = P.zl_bcnd;
-    *(flags+5) = P.zu_bcnd;
+  if (H.nz > 1) {
+    *(flags + 4) = P.zl_bcnd;
+    *(flags + 5) = P.zu_bcnd;
   }
 
-  for (int i=0; i<6; i++)
-  {
-    if (!( (flags[i]>=0)&&(flags[i]<=5) ) )
-    {
-      chprintf("Invalid boundary conditions. Must select between 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n");
-      chexit(-1);
-    }
+  for (int i = 0; i < 6; i++) {
     if (flags[i] == 4) {
       /*custom boundaries*/
       return 1;
@@ -169,154 +160,258 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P)
   return 0;
 }
 
-
-
 /*! \fn void Set_Boundaries(int dir, int flags[])
  *  \brief Apply boundary conditions to the grid. */
 void Grid3D::Set_Boundaries(int dir, int flags[])
 {
   int i, j, k;
-  int imin[3] = {0,0,0};
-  int imax[3] = {H.nx,H.ny,H.nz};
-  Real a[3]   = {1,1,1};  //sign of momenta
-  int idx;    //index of a real cell
-  int gidx;   //index of a ghost cell
+  int imin[3] = {0, 0, 0};
+  int imax[3] = {H.nx, H.ny, H.nz};
+  Real a[3]   = {1, 1, 1};  // sign of momenta
+  int idx;                  // index of a real cell
+  int gidx;                 // index of a ghost cell
 
   int nPB, nBoundaries;
   int *iaBoundary, *iaCell;
 
   /*if the cell face is an custom boundary, exit */
-  if(flags[dir]==4)
+  if (flags[dir] == 4) {
     return;
+  }
 
-#ifdef   MPI_CHOLLA
+#ifdef MPI_CHOLLA
   /*if the cell face is an mpi boundary, exit */
-  if(flags[dir]==5)
+  if (flags[dir] == 5) {
     return;
+  }
 #endif /*MPI_CHOLLA*/
 
-
-
-  #ifdef GRAVITY
-
-  if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
-    if ( flags[dir] == 1 ){
-      // Set Periodic Boundaries for the ghost cells.
-      #ifdef GRAVITY_GPU
-      if ( dir == 0 ) Set_Potential_Boundaries_Periodic_GPU( 0, 0, flags );
-      if ( dir == 1 ) Set_Potential_Boundaries_Periodic_GPU( 0, 1, flags );
-      if ( dir == 2 ) Set_Potential_Boundaries_Periodic_GPU( 1, 0, flags );
-      if ( dir == 3 ) Set_Potential_Boundaries_Periodic_GPU( 1, 1, flags );
-      if ( dir == 4 ) Set_Potential_Boundaries_Periodic_GPU( 2, 0, flags );
-      if ( dir == 5 ) Set_Potential_Boundaries_Periodic_GPU( 2, 1, flags );
-      #else
-      if ( dir == 0 ) Set_Potential_Boundaries_Periodic( 0, 0, flags );
-      if ( dir == 1 ) Set_Potential_Boundaries_Periodic( 0, 1, flags );
-      if ( dir == 2 ) Set_Potential_Boundaries_Periodic( 1, 0, flags );
-      if ( dir == 3 ) Set_Potential_Boundaries_Periodic( 1, 1, flags );
-      if ( dir == 4 ) Set_Potential_Boundaries_Periodic( 2, 0, flags );
-      if ( dir == 5 ) Set_Potential_Boundaries_Periodic( 2, 1, flags );
-      #endif
+#ifdef GRAVITY
+  if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    if (flags[dir] == 1) {
+  // Set Periodic Boundaries for the ghost cells.
+  #ifdef GRAVITY_GPU
+      if (dir == 0) {
+        Set_Potential_Boundaries_Periodic_GPU(0, 0, flags);
+      }
+      if (dir == 1) {
+        Set_Potential_Boundaries_Periodic_GPU(0, 1, flags);
+      }
+      if (dir == 2) {
+        Set_Potential_Boundaries_Periodic_GPU(1, 0, flags);
+      }
+      if (dir == 3) {
+        Set_Potential_Boundaries_Periodic_GPU(1, 1, flags);
+      }
+      if (dir == 4) {
+        Set_Potential_Boundaries_Periodic_GPU(2, 0, flags);
+      }
+      if (dir == 5) {
+        Set_Potential_Boundaries_Periodic_GPU(2, 1, flags);
+      }
+  #else
+      if (dir == 0) {
+        Set_Potential_Boundaries_Periodic(0, 0, flags);
+      }
+      if (dir == 1) {
+        Set_Potential_Boundaries_Periodic(0, 1, flags);
+      }
+      if (dir == 2) {
+        Set_Potential_Boundaries_Periodic(1, 0, flags);
+      }
+      if (dir == 3) {
+        Set_Potential_Boundaries_Periodic(1, 1, flags);
+      }
+      if (dir == 4) {
+        Set_Potential_Boundaries_Periodic(2, 0, flags);
+      }
+      if (dir == 5) {
+        Set_Potential_Boundaries_Periodic(2, 1, flags);
+      }
+  #endif
     }
-    if ( flags[dir] == 3 ){
-
-      #ifdef GRAVITY_GPU
-      if ( dir == 0 ) Set_Potential_Boundaries_Isolated_GPU( 0, 0, flags );
-      if ( dir == 1 ) Set_Potential_Boundaries_Isolated_GPU( 0, 1, flags );
-      if ( dir == 2 ) Set_Potential_Boundaries_Isolated_GPU( 1, 0, flags );
-      if ( dir == 3 ) Set_Potential_Boundaries_Isolated_GPU( 1, 1, flags );
-      if ( dir == 4 ) Set_Potential_Boundaries_Isolated_GPU( 2, 0, flags );
-      if ( dir == 5 ) Set_Potential_Boundaries_Isolated_GPU( 2, 1, flags );
-      #else
-      if ( dir == 0 ) Set_Potential_Boundaries_Isolated( 0, 0, flags );
-      if ( dir == 1 ) Set_Potential_Boundaries_Isolated( 0, 1, flags );
-      if ( dir == 2 ) Set_Potential_Boundaries_Isolated( 1, 0, flags );
-      if ( dir == 3 ) Set_Potential_Boundaries_Isolated( 1, 1, flags );
-      if ( dir == 4 ) Set_Potential_Boundaries_Isolated( 2, 0, flags );
-      if ( dir == 5 ) Set_Potential_Boundaries_Isolated( 2, 1, flags );
-      #endif//GRAVITY_GPU
+    if (flags[dir] == 3) {
+  #ifdef GRAVITY_GPU
+      if (dir == 0) {
+        Set_Potential_Boundaries_Isolated_GPU(0, 0, flags);
+      }
+      if (dir == 1) {
+        Set_Potential_Boundaries_Isolated_GPU(0, 1, flags);
+      }
+      if (dir == 2) {
+        Set_Potential_Boundaries_Isolated_GPU(1, 0, flags);
+      }
+      if (dir == 3) {
+        Set_Potential_Boundaries_Isolated_GPU(1, 1, flags);
+      }
+      if (dir == 4) {
+        Set_Potential_Boundaries_Isolated_GPU(2, 0, flags);
+      }
+      if (dir == 5) {
+        Set_Potential_Boundaries_Isolated_GPU(2, 1, flags);
+      }
+  #else
+      if (dir == 0) {
+        Set_Potential_Boundaries_Isolated(0, 0, flags);
+      }
+      if (dir == 1) {
+        Set_Potential_Boundaries_Isolated(0, 1, flags);
+      }
+      if (dir == 2) {
+        Set_Potential_Boundaries_Isolated(1, 0, flags);
+      }
+      if (dir == 3) {
+        Set_Potential_Boundaries_Isolated(1, 1, flags);
+      }
+      if (dir == 4) {
+        Set_Potential_Boundaries_Isolated(2, 0, flags);
+      }
+      if (dir == 5) {
+        Set_Potential_Boundaries_Isolated(2, 1, flags);
+      }
+  #endif  // GRAVITY_GPU
     }
     return;
   }
   #ifdef SOR
-  if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ){
-    if ( flags[dir] ==1 ){
-      if ( dir == 0 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 0, 0 );
-      if ( dir == 1 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 0, 1 );
-      if ( dir == 2 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 1, 0 );
-      if ( dir == 3 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 1, 1 );
-      if ( dir == 4 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 2, 0 );
-      if ( dir == 5 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 2, 1 );
+  if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) {
+    if (flags[dir] == 1) {
+      if (dir == 0) {
+        Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 0);
+      }
+      if (dir == 1) {
+        Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 1);
+      }
+      if (dir == 2) {
+        Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 0);
+      }
+      if (dir == 3) {
+        Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 1);
+      }
+      if (dir == 4) {
+        Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 0);
+      }
+      if (dir == 5) {
+        Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 1);
+      }
     }
     return;
   }
-  #endif //SOR
-  #endif //GRAVITY
-
-  #ifdef PARTICLES
-  if ( Particles.TRANSFER_DENSITY_BOUNDARIES ){
-    if ( flags[dir] ==1 ){
-      // Set Periodic Boundaries for the particles density.
-      #ifdef PARTICLES_GPU
-      if ( dir == 0 ) Set_Particles_Density_Boundaries_Periodic_GPU( 0, 0 );
-      if ( dir == 1 ) Set_Particles_Density_Boundaries_Periodic_GPU( 0, 1 );
-      if ( dir == 2 ) Set_Particles_Density_Boundaries_Periodic_GPU( 1, 0 );
-      if ( dir == 3 ) Set_Particles_Density_Boundaries_Periodic_GPU( 1, 1 );
-      if ( dir == 4 ) Set_Particles_Density_Boundaries_Periodic_GPU( 2, 0 );
-      if ( dir == 5 ) Set_Particles_Density_Boundaries_Periodic_GPU( 2, 1 );
-      #endif
-      #ifdef PARTICLES_CPU
-      if ( dir == 0 ) Set_Particles_Density_Boundaries_Periodic( 0, 0 );
-      if ( dir == 1 ) Set_Particles_Density_Boundaries_Periodic( 0, 1 );
-      if ( dir == 2 ) Set_Particles_Density_Boundaries_Periodic( 1, 0 );
-      if ( dir == 3 ) Set_Particles_Density_Boundaries_Periodic( 1, 1 );
-      if ( dir == 4 ) Set_Particles_Density_Boundaries_Periodic( 2, 0 );
-      if ( dir == 5 ) Set_Particles_Density_Boundaries_Periodic( 2, 1 );
-      #endif
+  #endif  // SOR
+#endif    // GRAVITY
+
+#ifdef PARTICLES
+  if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    if (flags[dir] == 1) {
+  // Set Periodic Boundaries for the particles density.
+  #ifdef PARTICLES_GPU
+      if (dir == 0) {
+        Set_Particles_Density_Boundaries_Periodic_GPU(0, 0);
+      }
+      if (dir == 1) {
+        Set_Particles_Density_Boundaries_Periodic_GPU(0, 1);
+      }
+      if (dir == 2) {
+        Set_Particles_Density_Boundaries_Periodic_GPU(1, 0);
+      }
+      if (dir == 3) {
+        Set_Particles_Density_Boundaries_Periodic_GPU(1, 1);
+      }
+      if (dir == 4) {
+        Set_Particles_Density_Boundaries_Periodic_GPU(2, 0);
+      }
+      if (dir == 5) {
+        Set_Particles_Density_Boundaries_Periodic_GPU(2, 1);
+      }
+  #endif
+  #ifdef PARTICLES_CPU
+      if (dir == 0) {
+        Set_Particles_Density_Boundaries_Periodic(0, 0);
+      }
+      if (dir == 1) {
+        Set_Particles_Density_Boundaries_Periodic(0, 1);
+      }
+      if (dir == 2) {
+        Set_Particles_Density_Boundaries_Periodic(1, 0);
+      }
+      if (dir == 3) {
+        Set_Particles_Density_Boundaries_Periodic(1, 1);
+      }
+      if (dir == 4) {
+        Set_Particles_Density_Boundaries_Periodic(2, 0);
+      }
+      if (dir == 5) {
+        Set_Particles_Density_Boundaries_Periodic(2, 1);
+      }
+  #endif
     }
     return;
   }
-  #endif  //PARTICLES
-
-  #ifdef PARTICLES
-  if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
-    if ( flags[dir] ==1 ){
-      #ifdef PARTICLES_CPU
-      if ( dir == 0 ) Set_Particles_Boundary( 0, 0 );
-      if ( dir == 1 ) Set_Particles_Boundary( 0, 1 );
-      if ( dir == 2 ) Set_Particles_Boundary( 1, 0 );
-      if ( dir == 3 ) Set_Particles_Boundary( 1, 1 );
-      if ( dir == 4 ) Set_Particles_Boundary( 2, 0 );
-      if ( dir == 5 ) Set_Particles_Boundary( 2, 1 );
-      #endif//PARTICLES_CPU
-
-      #ifdef PARTICLES_GPU
-      if ( dir == 0 ) Set_Particles_Boundary_GPU( 0, 0 );
-      if ( dir == 1 ) Set_Particles_Boundary_GPU( 0, 1 );
-      if ( dir == 2 ) Set_Particles_Boundary_GPU( 1, 0 );
-      if ( dir == 3 ) Set_Particles_Boundary_GPU( 1, 1 );
-      if ( dir == 4 ) Set_Particles_Boundary_GPU( 2, 0 );
-      if ( dir == 5 ) Set_Particles_Boundary_GPU( 2, 1 );
-      #endif//PARTICLES_GPU
-
-
-      } else if (flags[dir] == 3) {
-        #ifdef PARTICLES_CPU
-        Set_Particles_Open_Boundary(dir/2, dir%2);
-        #endif  //PARTICLES_CPU
+#endif  // PARTICLES
+
+#ifdef PARTICLES
+  if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+    if (flags[dir] == 1) {
+  #ifdef PARTICLES_CPU
+      if (dir == 0) {
+        Set_Particles_Boundary(0, 0);
+      }
+      if (dir == 1) {
+        Set_Particles_Boundary(0, 1);
+      }
+      if (dir == 2) {
+        Set_Particles_Boundary(1, 0);
+      }
+      if (dir == 3) {
+        Set_Particles_Boundary(1, 1);
+      }
+      if (dir == 4) {
+        Set_Particles_Boundary(2, 0);
+      }
+      if (dir == 5) {
+        Set_Particles_Boundary(2, 1);
+      }
+  #endif  // PARTICLES_CPU
+
+  #ifdef PARTICLES_GPU
+      if (dir == 0) {
+        Set_Particles_Boundary_GPU(0, 0);
+      }
+      if (dir == 1) {
+        Set_Particles_Boundary_GPU(0, 1);
+      }
+      if (dir == 2) {
+        Set_Particles_Boundary_GPU(1, 0);
+      }
+      if (dir == 3) {
+        Set_Particles_Boundary_GPU(1, 1);
+      }
+      if (dir == 4) {
+        Set_Particles_Boundary_GPU(2, 0);
+      }
+      if (dir == 5) {
+        Set_Particles_Boundary_GPU(2, 1);
+      }
+  #endif  // PARTICLES_GPU
+
+    } else if (flags[dir] == 3) {
+  #ifdef PARTICLES_CPU
+      Set_Particles_Open_Boundary_CPU(dir / 2, dir % 2);
+  #endif
+  #ifdef PARTICLES_GPU
+      Particles.Set_Particles_Open_Boundary_GPU(dir / 2, dir % 2);
+  #endif
     }
     return;
   }
-  #endif//PARTICLES
+#endif  // PARTICLES
 
-  //get the extents of the ghost region we are initializing
+  // get the extents of the ghost region we are initializing
   Set_Boundary_Extents(dir, &imin[0], &imax[0]);
 
   // from grid/cuda_boundaries.cu
-  SetGhostCells(C.device,
-		 H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, flags,
-		 imax[0]-imin[0], imax[1]-imin[1], imax[2]-imin[2],
-		 imin[0], imin[1], imin[2], dir);
+  SetGhostCells(C.device, H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, flags, imax[0] - imin[0],
+                imax[1] - imin[1], imax[2] - imin[2], imin[0], imin[1], imin[2], dir);
 }
 
 /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax)
@@ -330,108 +425,118 @@ void Grid3D::Set_Boundary_Extents(int dir, int *imin, int *imax)
   ku = H.nz;
   if (H.ny > 1) {
     jl = H.n_ghost;
-    ju = H.ny-H.n_ghost;
+    ju = H.ny - H.n_ghost;
   }
   if (H.nz > 1) {
     kl = H.n_ghost;
-    ku = H.nz-H.n_ghost;
+    ku = H.nz - H.n_ghost;
   }
 
   il = 0;
   iu = H.n_ghost;
   /*lower x face*/
-  if(dir==0)
-  {
-    *(imin) = il;
-    *(imax) = iu;
-    *(imin+1) = jl;
-    *(imax+1) = ju;
-    *(imin+2) = kl;
-    *(imax+2) = ku;
+  if (dir == 0) {
+    *(imin)     = il;
+    *(imax)     = iu;
+    *(imin + 1) = jl;
+    *(imax + 1) = ju;
+    *(imin + 2) = kl;
+    *(imax + 2) = ku;
   }
-  il = H.nx-H.n_ghost;
+  il = H.nx - H.n_ghost;
   iu = H.nx;
   /*upper x face*/
-  if(dir==1)
-  {
-    *(imin) = il;
-    *(imax) = iu;
-    *(imin+1) = jl;
-    *(imax+1) = ju;
-    *(imin+2) = kl;
-    *(imax+2) = ku;
+  if (dir == 1) {
+    *(imin)     = il;
+    *(imax)     = iu;
+    *(imin + 1) = jl;
+    *(imax + 1) = ju;
+    *(imin + 2) = kl;
+    *(imax + 2) = ku;
   }
   il = 0;
   iu = H.nx;
   jl = 0;
   ju = H.n_ghost;
   /*lower y face*/
-  if(dir==2)
-  {
-    *(imin) = il;
-    *(imax) = iu;
-    *(imin+1) = jl;
-    *(imax+1) = ju;
-    *(imin+2) = kl;
-    *(imax+2) = ku;
+  if (dir == 2) {
+    *(imin)     = il;
+    *(imax)     = iu;
+    *(imin + 1) = jl;
+    *(imax + 1) = ju;
+    *(imin + 2) = kl;
+    *(imax + 2) = ku;
   }
-  jl = H.ny-H.n_ghost;
+  jl = H.ny - H.n_ghost;
   ju = H.ny;
   /*upper y face*/
-  if(dir==3)
-  {
-    *(imin) = il;
-    *(imax) = iu;
-    *(imin+1) = jl;
-    *(imax+1) = ju;
-    *(imin+2) = kl;
-    *(imax+2) = ku;
+  if (dir == 3) {
+    *(imin)     = il;
+    *(imax)     = iu;
+    *(imin + 1) = jl;
+    *(imax + 1) = ju;
+    *(imin + 2) = kl;
+    *(imax + 2) = ku;
   }
   jl = 0;
   ju = H.ny;
   kl = 0;
   ku = H.n_ghost;
   /*lower z face*/
-  if(dir==4)
-  {
-    *(imin) = il;
-    *(imax) = iu;
-    *(imin+1) = jl;
-    *(imax+1) = ju;
-    *(imin+2) = kl;
-    *(imax+2) = ku;
+  if (dir == 4) {
+    *(imin)     = il;
+    *(imax)     = iu;
+    *(imin + 1) = jl;
+    *(imax + 1) = ju;
+    *(imin + 2) = kl;
+    *(imax + 2) = ku;
   }
-  kl = H.nz-H.n_ghost;
+  kl = H.nz - H.n_ghost;
   ku = H.nz;
   /*upper z face*/
-  if(dir==5)
-  {
-    *(imin) = il;
-    *(imax) = iu;
-    *(imin+1) = jl;
-    *(imax+1) = ju;
-    *(imin+2) = kl;
-    *(imax+2) = ku;
+  if (dir == 5) {
+    *(imin)     = il;
+    *(imax)     = iu;
+    *(imin + 1) = jl;
+    *(imax + 1) = ju;
+    *(imin + 2) = kl;
+    *(imax + 2) = ku;
   }
 }
 
-
-
 /*! \fn void Custom_Boundary(char bcnd[MAXLEN])
  *  \brief Select appropriate custom boundary function. */
 void Grid3D::Custom_Boundary(char bcnd[MAXLEN])
 {
-  if (strcmp(bcnd, "noh")==0) {
+  if (strcmp(bcnd, "noh") == 0) {
     // from grid/cuda_boundaries.cu
     Noh_Boundary();
-  }
-  else {
+  } else if (strcmp(bcnd, "wind") == 0) {
+    // from grid/cuda_boundaries.cu
+    Wind_Boundary();
+  } else {
     printf("ABORT: %s -> Unknown custom boundary condition.\n", bcnd);
     exit(0);
   }
 }
 
+/*! \fn void Wind_Boundary()
+ *  \brief Apply wind boundary */
+void Grid3D::Wind_Boundary()
+{
+  int x_off, y_off, z_off;
+  // set x, y, & z offsets of local CPU volume to pass to GPU
+  // so global position on the grid is known
+  x_off = y_off = z_off = 0;
+#ifdef MPI_CHOLLA
+  x_off = nx_local_start;
+  y_off = ny_local_start;
+  z_off = nz_local_start;
+#endif
 
+  Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound,
+                     H.ybound, H.zbound, gama, H.t);
+}
 
 /*! \fn void Noh_Boundary()
  *  \brief Apply analytic boundary conditions to +x, +y (and +z) faces,
@@ -445,106 +550,91 @@ void Grid3D::Noh_Boundary()
   // set x, y, & z offsets of local CPU volume to pass to GPU
   // so global position on the grid is known
   x_off = y_off = z_off = 0;
-  #ifdef MPI_CHOLLA
+#ifdef MPI_CHOLLA
   x_off = nx_local_start;
   y_off = ny_local_start;
   z_off = nz_local_start;
-  #endif
-
-  Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost,
-                    x_off, y_off, z_off, H.dx, H.dy, H.dz, 
-                    H.xbound, H.ybound, H.zbound, gama, H.t);
-
-/*
-  int i, j, k, id;
-  Real x_pos, y_pos, z_pos, r;
-  Real vx, vy, vz, d_0, P_0, P;
-
-  d_0 = 1.0;
-  P_0 = 1.0e-6;
-
-  // set exact boundaries on the +x face
-  for (k=0; k<H.nz; k++) {
-    for (j=0; j<H.ny; j++) {
-      for (i=H.nx-H.n_ghost; i<H.nx; i++) {
-
-        id = i + j*H.nx + k*H.nx*H.ny;
-        // get the (centered) x, y, and z positions at (x,y,z)
-        Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
-        if (H.nz > 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos);
-        else r = sqrt(x_pos*x_pos + y_pos*y_pos);
-        // set the velocities
-        vx = -x_pos / r;
-        vy = -y_pos / r;
-        if (H.nz > 1) vz = -z_pos / r;
-        else vz = 0;
-        // set the conserved quantities
-        if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r);
-        else C.density[id]    = d_0*(1.0 + H.t/r);
-        C.momentum_x[id] = vx*C.density[id];
-        C.momentum_y[id] = vy*C.density[id];
-        C.momentum_z[id] = vz*C.density[id];
-        C.Energy[id]     = P_0/(gama-1.0) + 0.5*C.density[id];
-
-      }
-    }
-  }
-
-  // set exact boundaries on the +y face
-  for (k=0; k<H.nz; k++) {
-    for (j=H.ny-H.n_ghost; j<H.ny; j++) {
-      for (i=0; i<H.nx; i++) {
-
-        id = i + j*H.nx + k*H.nx*H.ny;
-        // get the (centered) x, y, and z positions at (x,y,z)
-        Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
-        if (H.nz > 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos);
-        else r = sqrt(x_pos*x_pos + y_pos*y_pos);
-        // set the velocities
-        vx = -x_pos / r;
-        vy = -y_pos / r;
-        if (H.nz > 1) vz = -z_pos / r;
-        else vz = 0;
-        // set the conserved quantities
-        if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r);
-        else C.density[id]    = d_0*(1.0 + H.t/r);
-        C.momentum_x[id] = vx*C.density[id];
-        C.momentum_y[id] = vy*C.density[id];
-        C.momentum_z[id] = vz*C.density[id];
-        C.Energy[id]     = P_0/(gama-1.0) + 0.5*C.density[id];
-
+#endif
+
+  Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound,
+                    H.ybound, H.zbound, gama, H.t);
+
+  /*
+    int i, j, k, id;
+    Real x_pos, y_pos, z_pos, r;
+    Real vx, vy, vz, d_0, P_0, P;
+    d_0 = 1.0;
+    P_0 = 1.0e-6;
+    // set exact boundaries on the +x face
+    for (k=0; k<H.nz; k++) {
+      for (j=0; j<H.ny; j++) {
+        for (i=H.nx-H.n_ghost; i<H.nx; i++) {
+          id = i + j*H.nx + k*H.nx*H.ny;
+          // get the (centered) x, y, and z positions at (x,y,z)
+          Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
+          if (H.nz > 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos);
+          else r = sqrt(x_pos*x_pos + y_pos*y_pos);
+          // set the velocities
+          vx = -x_pos / r;
+          vy = -y_pos / r;
+          if (H.nz > 1) vz = -z_pos / r;
+          else vz = 0;
+          // set the conserved quantities
+          if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r);
+          else C.density[id]    = d_0*(1.0 + H.t/r);
+          C.momentum_x[id] = vx*C.density[id];
+          C.momentum_y[id] = vy*C.density[id];
+          C.momentum_z[id] = vz*C.density[id];
+          C.Energy[id]     = P_0/(gama-1.0) + 0.5*C.density[id];
+        }
       }
     }
-  }
-
-  // set exact boundaries on the +z face
-  if (H.nz > 1) {
-
-    for (k=H.nz-H.n_ghost; k<H.nz; k++) {
-      for (j=0; j<H.ny; j++) {
+    // set exact boundaries on the +y face
+    for (k=0; k<H.nz; k++) {
+      for (j=H.ny-H.n_ghost; j<H.ny; j++) {
         for (i=0; i<H.nx; i++) {
-
           id = i + j*H.nx + k*H.nx*H.ny;
           // get the (centered) x, y, and z positions at (x,y,z)
           Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
-          r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos);
+          if (H.nz > 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos);
+          else r = sqrt(x_pos*x_pos + y_pos*y_pos);
           // set the velocities
           vx = -x_pos / r;
           vy = -y_pos / r;
-          vz = -z_pos / r;
+          if (H.nz > 1) vz = -z_pos / r;
+          else vz = 0;
           // set the conserved quantities
-          C.density[id]    = d_0*(1.0 + H.t/r)*(1.0 + H.t/r);
+          if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r);
+          else C.density[id]    = d_0*(1.0 + H.t/r);
           C.momentum_x[id] = vx*C.density[id];
           C.momentum_y[id] = vy*C.density[id];
           C.momentum_z[id] = vz*C.density[id];
           C.Energy[id]     = P_0/(gama-1.0) + 0.5*C.density[id];
-
         }
       }
     }
-
-  }
-*/
+    // set exact boundaries on the +z face
+    if (H.nz > 1) {
+      for (k=H.nz-H.n_ghost; k<H.nz; k++) {
+        for (j=0; j<H.ny; j++) {
+          for (i=0; i<H.nx; i++) {
+            id = i + j*H.nx + k*H.nx*H.ny;
+            // get the (centered) x, y, and z positions at (x,y,z)
+            Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
+            r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos);
+            // set the velocities
+            vx = -x_pos / r;
+            vy = -y_pos / r;
+            vz = -z_pos / r;
+            // set the conserved quantities
+            C.density[id]    = d_0*(1.0 + H.t/r)*(1.0 + H.t/r);
+            C.momentum_x[id] = vx*C.density[id];
+            C.momentum_y[id] = vy*C.density[id];
+            C.momentum_z[id] = vz*C.density[id];
+            C.Energy[id]     = P_0/(gama-1.0) + 0.5*C.density[id];
+          }
+        }
+      }
+    }
+  */
 }
-
-
diff --git a/src/grid/cuda_boundaries.cu b/src/grid/cuda_boundaries.cu
index b8cff7b2f..baf846d3c 100644
--- a/src/grid/cuda_boundaries.cu
+++ b/src/grid/cuda_boundaries.cu
@@ -1,74 +1,80 @@
-#include "../utils/gpu.hpp"
 #include "../global/global.h"
 #include "../global/global_cuda.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
 #include "cuda_boundaries.h"
 
-__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &idMag);
+__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a);
 
-__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[],int nx, int ny, int nz, int n_ghost,  int &magneticIdx);
+__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost);
 
-__global__ void PackBuffers3DKernel(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int buffer_ncells, int n_fields, int n_cells)
+__global__ void PackBuffers3DKernel(Real *buffer, Real *c_head, int isize, int jsize, int ksize, int nx, int ny,
+                                    int idxoffset, int buffer_ncells, int n_fields, int n_cells)
 {
-  int id,i,j,k,idx,ii;
+  int id, i, j, k, idx, ii;
   id = threadIdx.x + blockIdx.x * blockDim.x;
-  if (id >= buffer_ncells){
+  if (id >= buffer_ncells) {
     return;
   }
-  k = id/(isize*jsize);
-  j = (id - k*isize*jsize)/isize;
-  i = id - k*isize*jsize - j*isize;
-  idx  = i + (j+k*ny)*nx + idxoffset;
+  k   = id / (isize * jsize);
+  j   = (id - k * isize * jsize) / isize;
+  i   = id - k * isize * jsize - j * isize;
+  idx = i + (j + k * ny) * nx + idxoffset;
   // idxoffset contains offset terms from
   // idx = (i+ioffset) + (j+joffset)*H.nx + (k+koffset)*H.nx*H.ny;
-  for (ii=0; ii<n_fields; ii++) {
-    *(buffer + id + ii*buffer_ncells) = c_head[idx + ii*n_cells];
+  for (ii = 0; ii < n_fields; ii++) {
+    *(buffer + id + ii * buffer_ncells) = c_head[idx + ii * n_cells];
   }
-
 }
 
-
-void PackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize){
-  int buffer_ncells = isize*jsize*ksize;
-  dim3 dim1dGrid((buffer_ncells+TPB-1)/TPB, 1, 1);
+void PackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize,
+                   int jsize, int ksize)
+{
+  int buffer_ncells = isize * jsize * ksize;
+  dim3 dim1dGrid((buffer_ncells + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  hipLaunchKernelGGL(PackBuffers3DKernel,dim1dGrid,dim1dBlock,0,0,buffer,c_head,isize,jsize,ksize,nx,ny,idxoffset,buffer_ncells,n_fields,n_cells);
-  CHECK(cudaDeviceSynchronize());
+  hipLaunchKernelGGL(PackBuffers3DKernel, dim1dGrid, dim1dBlock, 0, 0, buffer, c_head, isize, jsize, ksize, nx, ny,
+                     idxoffset, buffer_ncells, n_fields, n_cells);
+  GPU_Error_Check(cudaDeviceSynchronize());
 }
 
-
-__global__ void UnpackBuffers3DKernel(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int buffer_ncells, int n_fields, int n_cells)
+__global__ void UnpackBuffers3DKernel(Real *buffer, Real *c_head, int isize, int jsize, int ksize, int nx, int ny,
+                                      int idxoffset, int buffer_ncells, int n_fields, int n_cells)
 {
-  int id,i,j,k,idx,ii;
+  int id, i, j, k, idx, ii;
   id = threadIdx.x + blockIdx.x * blockDim.x;
-  if (id >= buffer_ncells){
+  if (id >= buffer_ncells) {
     return;
   }
-  k = id/(isize*jsize);
-  j = (id - k*isize*jsize)/isize;
-  i = id - k*isize*jsize - j*isize;
-  idx  = i + (j+k*ny)*nx + idxoffset;
-  for (ii=0; ii<n_fields; ii++) {
-    c_head[idx + ii*n_cells] = *(buffer + id + ii*buffer_ncells);
+  k   = id / (isize * jsize);
+  j   = (id - k * isize * jsize) / isize;
+  i   = id - k * isize * jsize - j * isize;
+  idx = i + (j + k * ny) * nx + idxoffset;
+  for (ii = 0; ii < n_fields; ii++) {
+    c_head[idx + ii * n_cells] = *(buffer + id + ii * buffer_ncells);
   }
-
 }
 
-void UnpackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize){
-//void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells){
-  int buffer_ncells = isize*jsize*ksize;
-  dim3 dim1dGrid((buffer_ncells+TPB-1)/TPB, 1, 1);
+void UnpackBuffers3D(Real *buffer, Real *c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize,
+                     int jsize, int ksize)
+{
+  // void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize,
+  // int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int
+  // n_cells){
+  int buffer_ncells = isize * jsize * ksize;
+  dim3 dim1dGrid((buffer_ncells + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  hipLaunchKernelGGL(UnpackBuffers3DKernel,dim1dGrid,dim1dBlock,0,0,buffer,c_head,isize,jsize,ksize,nx,ny,idxoffset,buffer_ncells,n_fields,n_cells);
+  hipLaunchKernelGGL(UnpackBuffers3DKernel, dim1dGrid, dim1dBlock, 0, 0, buffer, c_head, isize, jsize, ksize, nx, ny,
+                     idxoffset, buffer_ncells, n_fields, n_cells);
 }
 
-__global__ void SetGhostCellsKernel(Real * c_head,
-				     int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost,
-				     int f0, int f1, int f2, int f3, int f4, int f5,
-				     int isize, int jsize, int ksize,
-				     int imin, int jmin, int kmin, int dir){
-  int id,i,j,k,gidx,idx,ii, magneticIdx;
-  Real a[3] = {1.,1.,1.};
-  int flags[6] = {f0,f1,f2,f3,f4,f5};
+__global__ void SetGhostCellsKernel(Real *c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost,
+                                    int f0, int f1, int f2, int f3, int f4, int f5, int isize, int jsize, int ksize,
+                                    int imin, int jmin, int kmin, int dir)
+{
+  int id, i, j, k, gidx, idx, ii;
+  Real a[3]    = {1., 1., 1.};
+  int flags[6] = {f0, f1, f2, f3, f4, f5};
 
   // using thread ID calculate which ghost cell this is
   // calculate which real cell using SetBoundaryMapping
@@ -80,262 +86,205 @@ __global__ void SetGhostCellsKernel(Real * c_head,
   id = threadIdx.x + blockIdx.x * blockDim.x;
 
   // not true i,j,k but relative i,j,k
-  k = id/(isize*jsize);
-  j = (id - k*isize*jsize)/isize;
-  i = id - k*isize*jsize - j*isize;
-  if (id>=isize*jsize*ksize){
+  k = id / (isize * jsize);
+  j = (id - k * isize * jsize) / isize;
+  i = id - k * isize * jsize - j * isize;
+  if (id >= isize * jsize * ksize) {
     return;
   }
   // true i,j,k conversion
   i += imin;
   j += jmin;
   k += kmin;
-  gidx = i + j*nx + k*nx*ny;
+  gidx = i + j * nx + k * nx * ny;
 
   // calculate idx (index of real cell) and a[:] for reflection
-  idx = SetBoundaryMapping(i,j,k,&a[0],flags,nx,ny,nz,n_ghost,magneticIdx);
-
-  if (idx>=0){
-    for (ii=0; ii<n_fields; ii++) {
-      #ifdef  MHD
-        // Choose which index to use, the one for magnetic fields or not
-        int index = ((5+NSCALARS <= ii) and ( ii <= 7+NSCALARS))? magneticIdx: idx;
-        c_head[gidx + ii*n_cells] = c_head[index + ii*n_cells];
-      #else // MHD not defined
-        c_head[gidx + ii*n_cells] = c_head[idx + ii*n_cells];
-      #endif  //MHD
+  idx = SetBoundaryMapping(i, j, k, &a[0], flags, nx, ny, nz, n_ghost);
+
+  if (idx >= 0) {
+    for (ii = 0; ii < n_fields; ii++) {
+      c_head[gidx + ii * n_cells] = c_head[idx + ii * n_cells];
     }
     // momentum correction for reflection
-    // these are set to -1 whenever ghost cells in a direction are in a reflective boundary condition
-    if (flags[0]==2 || flags[1]==2){
+    // these are set to -1 whenever ghost cells in a direction are in a
+    // reflective boundary condition
+    if (flags[0] == 2 || flags[1] == 2) {
       c_head[gidx + n_cells] *= a[0];
     }
-    if (flags[2]==2 || flags[3]==2){
-      c_head[gidx + 2*n_cells] *= a[1];
+    if (flags[2] == 2 || flags[3] == 2) {
+      c_head[gidx + 2 * n_cells] *= a[1];
     }
-    if (flags[4]==2 || flags[5]==2){
-      c_head[gidx + 3*n_cells] *= a[2];
+    if (flags[4] == 2 || flags[5] == 2) {
+      c_head[gidx + 3 * n_cells] *= a[2];
     }
+
+#ifndef MHD
     // energy and momentum correction for transmission
     // Diode: only allow outflow
-    if (flags[dir] == 3){
+    if (flags[dir] == 3) {
       //
-      int momdex = gidx + (dir/2+1)*n_cells;
+      int momdex = gidx + (dir / 2 + 1) * n_cells;
       // (X) Dir 0,1 -> Mom 1 -> c_head[gidx+1*n_cells]
       // (Y) Dir 2,3 -> Mom 2 -> c_head[gidx+2*n_cells]
       // (Z) Dir 4,5 -> Mom 3 -> c_head[gidx+3*n_cells]
       // If a momentum is set to 0, subtract its kinetic energy [gidx+4*n_cells]
-      if (dir%2 == 0){
-	// Direction 0,2,4 are left-side, don't allow inflow with positive momentum
-	if (c_head[momdex] > 0.0) {
-	  c_head[gidx+4*n_cells] -= 0.5*(c_head[momdex]*c_head[momdex])/c_head[gidx];
-	  c_head[momdex] = 0.0;
-	}
+      if (dir % 2 == 0) {
+        // Direction 0,2,4 are left-side, don't allow inflow with positive
+        // momentum
+        if (c_head[momdex] > 0.0) {
+          c_head[gidx + 4 * n_cells] -= 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx];
+          c_head[momdex] = 0.0;
+        }
       } else {
-	// Direction 1,3,5 are right-side, don't allow inflow with negative momentum
-	if (c_head[momdex] < 0.0) {
-	  c_head[gidx+4*n_cells] -= 0.5*(c_head[momdex]*c_head[momdex])/c_head[gidx];
-	  c_head[momdex] = 0.0;
-	}
+        // Direction 1,3,5 are right-side, don't allow inflow with negative
+        // momentum
+        if (c_head[momdex] < 0.0) {
+          c_head[gidx + 4 * n_cells] -= 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx];
+          c_head[momdex] = 0.0;
+        }
       }
-    }//end energy correction for transmissive boundaries
-  }//end idx>=0
-}//end function
-
-void SetGhostCells(Real * c_head,
-		    int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[],
-		    int isize, int jsize, int ksize,
-		    int imin, int jmin, int kmin, int dir)
+    }   // end energy correction for transmissive boundaries
+#endif  // not MHD
+  }     // end idx>=0
+}  // end function
+
+void SetGhostCells(Real *c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], int isize,
+                   int jsize, int ksize, int imin, int jmin, int kmin, int dir)
 {
-  dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1);
+  dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  hipLaunchKernelGGL(SetGhostCellsKernel,dim1dGrid,dim1dBlock,0,0,c_head,
-		     nx,ny,nz,n_fields,n_cells,n_ghost,
-		     flags[0],flags[1],flags[2],flags[3],flags[4],flags[5],
-		     isize,jsize,ksize,imin,jmin,kmin,dir);
-
+  hipLaunchKernelGGL(SetGhostCellsKernel, dim1dGrid, dim1dBlock, 0, 0, c_head, nx, ny, nz, n_fields, n_cells, n_ghost,
+                     flags[0], flags[1], flags[2], flags[3], flags[4], flags[5], isize, jsize, ksize, imin, jmin, kmin,
+                     dir);
 }
 
-__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost, int &magneticIdx){
+__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost)
+{
   // nx, ny, nz, n_ghost
   /* 1D */
-  // irMag, jrMag, krMag are the magnetic indices
-  int ir, jr, kr, irMag, jrMag, krMag, idx;
-  ir=jr=kr=irMag=jrMag=krMag=idx=magneticIdx=0;
-  if (nx>1) {
-
+  int ir, jr, kr, idx;
+  ir = jr = kr = idx = 0;
+  if (nx > 1) {
     // set index on -x face
     if (ig < n_ghost) {
-      ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0], irMag);
+      ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0]);
     }
     // set index on +x face
-    else if (ig >= nx-n_ghost) {
-      ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0], irMag);
+    else if (ig >= nx - n_ghost) {
+      ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0]);
     }
     // set i index for multi-D problems
     else {
       ir = ig;
-      #ifdef  MHD
-        irMag = ig;
-      #endif  //MHD
     }
 
     // if custom x boundaries are needed, set index to -1 and return
     if (ir < 0) {
-      #ifdef  MHD
-        magneticIdx = -1;
-      #endif  //MHD
       return idx = -1;
     }
 
     // otherwise add i index to ghost cell mapping
     idx += ir;
-    #ifdef  MHD
-      magneticIdx += irMag;
-    #endif  //MHD
-
   }
 
   /* 2D */
   if (ny > 1) {
-
     // set index on -y face
     if (jg < n_ghost) {
-      jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1], jrMag);
+      jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1]);
     }
     // set index on +y face
-    else if (jg >= ny-n_ghost) {
-      jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1], jrMag);
+    else if (jg >= ny - n_ghost) {
+      jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1]);
     }
     // set j index for multi-D problems
     else {
       jr = jg;
-      #ifdef  MHD
-        jrMag = jg;
-      #endif  //MHD
     }
 
     // if custom y boundaries are needed, set index to -1 and return
     if (jr < 0) {
-      #ifdef  MHD
-        magneticIdx = -1;
-      #endif  //MHD
       return idx = -1;
     }
 
     // otherwise add j index to ghost cell mapping
-    idx += nx*jr;
-    #ifdef  MHD
-      magneticIdx += nx*jrMag;
-    #endif  //MHD
-
+    idx += nx * jr;
   }
 
   /* 3D */
   if (nz > 1) {
-
     // set index on -z face
     if (kg < n_ghost) {
-      kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2], krMag);
+      kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2]);
     }
     // set index on +z face
-    else if (kg >= nz-n_ghost) {
-      kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2], krMag);
+    else if (kg >= nz - n_ghost) {
+      kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2]);
     }
     // set k index for multi-D problems
     else {
       kr = kg;
-      #ifdef  MHD
-        krMag = kg;
-      #endif  //MHD
     }
 
     // if custom z boundaries are needed, set index to -1 and return
     if (kr < 0) {
-      #ifdef  MHD
-        magneticIdx = -1;
-      #endif  //MHD
       return idx = -1;
     }
 
     // otherwise add k index to ghost cell mapping
-    idx += nx*ny*kr;
-    #ifdef  MHD
-      magneticIdx += nx*ny*krMag;
-    #endif  //MHD
+    idx += nx * ny * kr;
   }
   return idx;
 }
 
-__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &idMag){
+__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a)
+{
   int id;
 
   // lower face
-  if (face==0)
-  {
-    switch(flag)
-    {
+  if (face == 0) {
+    switch (flag) {
       // periodic
       case 1:
-        id = ig+nx-2*n_ghost;
-        #ifdef  MHD
-          idMag = id;
-        #endif  //MHD
+        id = ig + nx - 2 * n_ghost;
         break;
       // reflective
       case 2:
-        id = 2*n_ghost-ig-1;
+        id   = 2 * n_ghost - ig - 1;
         *(a) = -1.0;
-        #ifdef  MHD
-          idMag = id - 1;
-        #endif  //MHD
         break;
       // transmissive
       case 3:
         id = n_ghost;
-        #ifdef  MHD
-          idMag = id - 1;
-        #endif  //MHD
         break;
       // custom
       case 4:
         id = -1;
-        #ifdef  MHD
-          idMag = -1;
-        #endif  //MHD
         break;
       // MPI
       case 5:
         id = ig;
-        #ifdef  MHD
-          idMag = id;
-        #endif  //MHD
         break;
       // default is periodic
       default:
-        id = ig+nx-2*n_ghost;
-        #ifdef  MHD
-          idMag = id;
-        #endif  //MHD
+        id = ig + nx - 2 * n_ghost;
     }
   }
   // upper face
-  else
-  {
-    switch(flag)
-    {
+  else {
+    switch (flag) {
       // periodic
       case 1:
-        id = ig-nx+2*n_ghost;
+        id = ig - nx + 2 * n_ghost;
         break;
       // reflective
       case 2:
-        id = 2*(nx-n_ghost)-ig-1;
+        id   = 2 * (nx - n_ghost) - ig - 1;
         *(a) = -1.0;
-      break;
+        break;
       // transmissive
       case 3:
-        id = nx-n_ghost-1;
+        id = nx - n_ghost - 1;
         break;
       // custom
       case 4:
@@ -347,27 +296,63 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *
         break;
       // default is periodic
       default:
-        id = ig-nx+2*n_ghost;
+        id = ig - nx + 2 * n_ghost;
     }
-    #ifdef  MHD
-      idMag = id;
-    #endif  //MHD
   }
   return id;
 }
 
+__global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off,
+                                     int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound,
+                                     Real zbound, Real gamma, Real t)
+{
+  int id, xid, yid, zid, gid;
+  Real n_0, T_0;
+  Real mu = 0.6;
+  Real vx, vy, vz, d_0, P_0;
+
+  n_0 = 1e-2;  // same value as n_bg in cloud initial condition function (cm^-3)
+  T_0 = 3e6;   // same value as T_bg in cloud initial condition function (K)
+
+  // same values as rho_bg and p_bg in cloud initial condition function
+  d_0 = n_0 * mu * MP / DENSITY_UNIT;
+  P_0 = n_0 * KB * T_0 / PRESSURE_UNIT;
+
+  vx = 100 * TIME_UNIT / KPC;  // km/s * (cholla unit conversion)
+  vy = 0.0;
+  vz = 0.0;
 
-__global__ void Noh_Boundary_kernel(Real * c_device,
-				     int nx, int ny, int nz, int n_cells, int n_ghost,
-             int x_off, int y_off, int z_off,
-             Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t)
+  // calculate ghost cell ID and i,j,k in GPU grid
+  id = threadIdx.x + blockIdx.x * blockDim.x;
+
+  // not true i,j,k but relative i,j,k in the GPU grid
+  cuda_utilities::compute3DIndices(id, n_ghost, ny, xid, yid, zid);
+
+  // map thread id to ghost cell id
+  xid += 0;  // -x boundary
+  gid = xid + yid * nx + zid * nx * ny;
+
+  if (xid <= n_ghost && xid < nx && yid < ny && zid < nz) {
+    // set conserved variables
+    c_device[gid]               = d_0;
+    c_device[gid + 1 * n_cells] = vx * d_0;
+    c_device[gid + 2 * n_cells] = vy * d_0;
+    c_device[gid + 3 * n_cells] = vz * d_0;
+    c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * d_0 * (vx * vx + vy * vy + vz * vz);
+  }
+  __syncthreads();
+}
+
+__global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off,
+                                    int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound,
+                                    Real zbound, Real gamma, Real t)
 {
-  int id,xid,yid,zid,gid;
+  int id, xid, yid, zid, gid;
   Real x_pos, y_pos, z_pos, r;
   Real vx, vy, vz, d_0, P_0;
 
   d_0 = 1.0;
-  P_0 = 1.0e-6;  
+  P_0 = 1.0e-6;
 
   // calculate ghost cell ID and i,j,k in GPU grid
   id = threadIdx.x + blockIdx.x * blockDim.x;
@@ -381,40 +366,48 @@ __global__ void Noh_Boundary_kernel(Real * c_device,
   // ksize = nz;
 
   // not true i,j,k but relative i,j,k in the GPU grid
-  zid = id/(isize*jsize);
-  yid = (id - zid*isize*jsize)/isize;
-  xid = id - zid*isize*jsize - yid*isize;
+  zid = id / (isize * jsize);
+  yid = (id - zid * isize * jsize) / isize;
+  xid = id - zid * isize * jsize - yid * isize;
 
   // map thread id to ghost cell id
-  xid += nx-n_ghost; // +x boundary
-  gid = xid + yid*nx + zid*nx*ny;
-
-  if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) {
+  xid += nx - n_ghost;  // +x boundary
+  gid = xid + yid * nx + zid * nx * ny;
 
-    // use the subgrid offset and global boundaries to calculate absolute positions on the grid
-    x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound;
-    y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound;
-    z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound;
+  if (xid >= nx - n_ghost && xid < nx && yid < ny && zid < nz) {
+    // use the subgrid offset and global boundaries to calculate absolute
+    // positions on the grid
+    x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound;
+    y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound;
+    z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound;
 
     // for 2D calculate polar r
-    if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos);
-    // for 3D calculate spherical r
-    else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos);
+    if (nz == 1) {
+      r = sqrt(x_pos * x_pos + y_pos * y_pos);
+      // for 3D calculate spherical r
+    } else {
+      r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos);
+    }
 
     // calculate the velocities
     vx = -x_pos / r;
     vy = -y_pos / r;
-    if (nz > 1) vz = -z_pos / r;
-    else vz = 0;
+    if (nz > 1) {
+      vz = -z_pos / r;
+    } else {
+      vz = 0;
+    }
     // set the conserved quantities
-    if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r);
-    else c_device[gid]    = d_0*(1.0 + t/r);
-    c_device[gid+1*n_cells] = vx*c_device[gid];
-    c_device[gid+2*n_cells] = vy*c_device[gid];
-    c_device[gid+3*n_cells] = vz*c_device[gid];
-    c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid];
+    if (nz > 1) {
+      c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r);
+    } else {
+      c_device[gid] = d_0 * (1.0 + t / r);
+    }
+    c_device[gid + 1 * n_cells] = vx * c_device[gid];
+    c_device[gid + 2 * n_cells] = vy * c_device[gid];
+    c_device[gid + 3 * n_cells] = vz * c_device[gid];
+    c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid];
   }
-  __syncthreads();  
 
   // +y boundary next
   isize = nx;
@@ -422,90 +415,107 @@ __global__ void Noh_Boundary_kernel(Real * c_device,
   // ksize = nz;
 
   // not true i,j,k but relative i,j,k
-  zid = id/(isize*jsize);
-  yid = (id - zid*isize*jsize)/isize;
-  xid = id - zid*isize*jsize - yid*isize;
+  zid = id / (isize * jsize);
+  yid = (id - zid * isize * jsize) / isize;
+  xid = id - zid * isize * jsize - yid * isize;
 
   // map thread id to ghost cell id
-  yid += ny-n_ghost; // +y boundary
-  gid = xid + yid*nx + zid*nx*ny;
+  yid += ny - n_ghost;  // +y boundary
+  gid = xid + yid * nx + zid * nx * ny;
 
-  if (xid < nx && yid >= ny-n_ghost && yid < ny && zid < nz) {
-
-    // use the subgrid offset and global boundaries to calculate absolute positions on the grid
-    x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound;
-    y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound;
-    z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound;
+  if (xid < nx && yid >= ny - n_ghost && yid < ny && zid < nz) {
+    // use the subgrid offset and global boundaries to calculate absolute
+    // positions on the grid
+    x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound;
+    y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound;
+    z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound;
 
     // for 2D calculate polar r
-    if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos);
-    // for 3D, calculate spherical r
-    else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos);
+    if (nz == 1) {
+      r = sqrt(x_pos * x_pos + y_pos * y_pos);
+      // for 3D, calculate spherical r
+    } else {
+      r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos);
+    }
 
     // calculate the velocities
     vx = -x_pos / r;
     vy = -y_pos / r;
-    if (nz > 1) vz = -z_pos / r;
-    else vz = 0;
+    if (nz > 1) {
+      vz = -z_pos / r;
+    } else {
+      vz = 0;
+    }
     // set the conserved quantities
-    if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r);
-    else c_device[gid]    = d_0*(1.0 + t/r);
-    c_device[gid+1*n_cells] = vx*c_device[gid];
-    c_device[gid+2*n_cells] = vy*c_device[gid];
-    c_device[gid+3*n_cells] = vz*c_device[gid];
-    c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid];
-  } 
-  __syncthreads();  
+    if (nz > 1) {
+      c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r);
+    } else {
+      c_device[gid] = d_0 * (1.0 + t / r);
+    }
+    c_device[gid + 1 * n_cells] = vx * c_device[gid];
+    c_device[gid + 2 * n_cells] = vy * c_device[gid];
+    c_device[gid + 3 * n_cells] = vz * c_device[gid];
+    c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid];
+  }
+  __syncthreads();
 
   // +z boundary last (only if 3D)
-  if (nz == 1) return;
+  if (nz == 1) {
+    return;
+  }
 
   isize = nx;
   jsize = ny;
   // ksize = n_ghost;
 
   // not true i,j,k but relative i,j,k
-  zid = id/(isize*jsize);
-  yid = (id - zid*isize*jsize)/isize;
-  xid = id - zid*isize*jsize - yid*isize;
+  zid = id / (isize * jsize);
+  yid = (id - zid * isize * jsize) / isize;
+  xid = id - zid * isize * jsize - yid * isize;
 
   // map thread id to ghost cell id
-  zid += nz-n_ghost; // +z boundary
-  gid = xid + yid*nx + zid*nx*ny;
-
-  if (xid < nx && yid < ny && zid >= nz-n_ghost && zid < nz) {
+  zid += nz - n_ghost;  // +z boundary
+  gid = xid + yid * nx + zid * nx * ny;
 
-    // use the subgrid offset and global boundaries to calculate absolute positions on the grid
-    x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound;
-    y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound;
-    z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound;
+  if (xid < nx && yid < ny && zid >= nz - n_ghost && zid < nz) {
+    // use the subgrid offset and global boundaries to calculate absolute
+    // positions on the grid
+    x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound;
+    y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound;
+    z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound;
 
     // for 2D calculate polar r
-    if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos);
-    // for 3D, calculate spherical r
-    else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos);
+    if (nz == 1) {
+      r = sqrt(x_pos * x_pos + y_pos * y_pos);
+      // for 3D, calculate spherical r
+    } else {
+      r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos);
+    }
 
     // calculate the velocities
     vx = -x_pos / r;
     vy = -y_pos / r;
-    if (nz > 1) vz = -z_pos / r;
-    else vz = 0;
+    if (nz > 1) {
+      vz = -z_pos / r;
+    } else {
+      vz = 0;
+    }
     // set the conserved quantities
-    if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r);
-    else c_device[gid]    = d_0*(1.0 + t/r);
-    c_device[gid+1*n_cells] = vx*c_device[gid];
-    c_device[gid+2*n_cells] = vy*c_device[gid];
-    c_device[gid+3*n_cells] = vz*c_device[gid];
-    c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid];
-  } 
+    if (nz > 1) {
+      c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r);
+    } else {
+      c_device[gid] = d_0 * (1.0 + t / r);
+    }
+    c_device[gid + 1 * n_cells] = vx * c_device[gid];
+    c_device[gid + 2 * n_cells] = vy * c_device[gid];
+    c_device[gid + 3 * n_cells] = vz * c_device[gid];
+    c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid];
+  }
 }
 
-
-void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost,
-                       int x_off, int y_off, int z_off, Real dx, Real dy, Real dz,
-                       Real xbound, Real ybound, Real zbound, Real gamma, Real t)
+void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off,
+                        int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t)
 {
-
   // determine the size of the grid to launch
   // need at least as many threads as the largest boundary face
   // current implementation assumes the test is run on a cube...
@@ -514,16 +524,29 @@ void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int
   jsize = ny;
   ksize = nz;
 
-  dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1);
+  dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
 
   // launch the boundary kernel
-  hipLaunchKernelGGL(Noh_Boundary_kernel,dim1dGrid,dim1dBlock,0,0,c_device,
-		     nx,ny,nz,n_cells,n_ghost,
-         x_off,y_off,z_off,dx,dy,dz,xbound,ybound,zbound,gamma,t);
-
-
-
+  hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off,
+                     y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t);
 }
 
+void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off,
+                       int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t)
+{
+  // determine the size of the grid to launch
+  // need at least as many threads as the largest boundary face
+  // current implementation assumes the test is run on a cube...
+  int isize, jsize, ksize;
+  isize = n_ghost;
+  jsize = ny;
+  ksize = nz;
+
+  dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1);
+  dim3 dim1dBlock(TPB, 1, 1);
 
+  // launch the boundary kernel
+  hipLaunchKernelGGL(Noh_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off,
+                     y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t);
+}
\ No newline at end of file
diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h
index f7212401a..bbf0a5ab8 100644
--- a/src/grid/cuda_boundaries.h
+++ b/src/grid/cuda_boundaries.h
@@ -1,21 +1,23 @@
-#ifdef CUDA
-#include "../utils/gpu.hpp"
 #include "../global/global.h"
 #include "../global/global_cuda.h"
+#include "../utils/gpu.hpp"
 
-//void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells);
-void PackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize);
+// void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int
+// ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells);
+void PackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize,
+                   int jsize, int ksize);
 
-void UnpackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize);
-//void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells);
+void UnpackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize,
+                     int jsize, int ksize);
+// void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int
+// ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells);
 
-void SetGhostCells(Real * c_head,
-		    int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[],
-		    int isize, int jsize, int ksize,
-		    int imin, int jmin, int kmin, int dir);
+void SetGhostCells(Real* c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], int isize,
+                   int jsize, int ksize, int imin, int jmin, int kmin, int dir);
 
-void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost,
-                       int x_off, int y_off, int z_off, Real dx, Real dy, Real dz,
-                       Real xbound, Real ybound, Real zbound, Real gamma, Real t);        
+void Wind_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off,
+                        int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma,
+                        Real t);
 
-#endif
+void Noh_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off,
+                       int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t);
diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp
index 03c1dc7c1..ef4d57928 100644
--- a/src/grid/grid3D.cpp
+++ b/src/grid/grid3D.cpp
@@ -1,43 +1,46 @@
 /*! \file grid3D.cpp
  *  \brief Definitions of the Grid3D class */
-#include <stdlib.h>
 #include <math.h>
+#include <stdlib.h>
 #include <string.h>
 #ifdef HDF5
-#include <hdf5.h>
+  #include <hdf5.h>
 #endif
 #include "../global/global.h"
 #include "../grid/grid3D.h"
-#include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU
+#include "../grid/grid_enum.h"    // provides grid_enum
+#include "../hydro/hydro_cuda.h"  // provides Calc_dt_GPU
 #include "../integrators/VL_1D_cuda.h"
 #include "../integrators/VL_2D_cuda.h"
 #include "../integrators/VL_3D_cuda.h"
-#include "../io/io.h"
-#include "../utils/error_handling.h"
-#include "../utils/ran.h"
 #include "../integrators/simple_1D_cuda.h"
 #include "../integrators/simple_2D_cuda.h"
 #include "../integrators/simple_3D_cuda.h"
+#include "../io/io.h"
+#include "../utils/error_handling.h"
 #ifdef MPI_CHOLLA
-#include <mpi.h>
-#ifdef HDF5
-#include <H5FDmpio.h>
-#endif
-#include "../mpi/mpi_routines.h"
+  #include <mpi.h>
+  #ifdef HDF5
+    #include <H5FDmpio.h>
+  #endif
+  #include "../mpi/mpi_routines.h"
 #endif
 #include <stdio.h>
 #ifdef CLOUDY_COOL
-#include "../cooling/load_cloudy_texture.h" // provides Load_Cuda_Textures and Free_Cuda_Textures
+  #include "../cooling/load_cloudy_texture.h"  // provides Load_Cuda_Textures and Free_Cuda_Textures
 #endif
 
 #ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
+  #include "../utils/parallel_omp.h"
 #endif
 
 #ifdef COOLING_GPU
-#include "../cooling/cooling_cuda.h" // provides Cooling_Update
+  #include "../cooling/cooling_cuda.h"  // provides Cooling_Update
 #endif
 
+#ifdef DUST
+  #include "../dust/dust_cuda.h"  // provides Dust_Update
+#endif
 
 /*! \fn Grid3D(void)
  *  \brief Constructor for the Grid. */
@@ -46,115 +49,137 @@ Grid3D::Grid3D(void)
   // set initialization flag to 0
   flag_init = 0;
 
-  // set number of ghost cells
-  #ifdef PCM
+// set number of ghost cells
+#ifdef PCM
   H.n_ghost = 2;
-  #endif //PCM
-  #ifdef PLMP
+#endif  // PCM
+#ifdef PLMP
   H.n_ghost = 3;
-  #endif //PLMP
-  #ifdef PLMC
+#endif  // PLMP
+#ifdef PLMC
   H.n_ghost = 3;
-  #endif //PLMC
-  #ifdef PPMP
+#endif  // PLMC
+#ifdef PPMP
+  H.n_ghost = 4;
+#endif  // PPMP
+#ifdef PPMC
   H.n_ghost = 4;
-  #endif //PPMP
-  #ifdef PPMC
-  H.n_ghost=4;
-  #endif //PPMC
+#endif  // PPMC
 
-  #ifdef GRAVITY
+#ifdef GRAVITY
   H.n_ghost_potential_offset = H.n_ghost - N_GHOST_POTENTIAL;
-  #endif
+#endif
 
+#ifdef MHD
+  // Set the number of ghost cells high enough for MHD. MHD needs one extra for the left most face
+  H.n_ghost++;
+#endif  // MHD
 }
 
-/*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos)
- *  \brief Get the cell-centered position based on cell index */
+/*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real
+ * *zpos) \brief Get the cell-centered position based on cell index */
 void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, Real *z_pos)
 {
+#ifndef MPI_CHOLLA
 
-#ifndef   MPI_CHOLLA
-
-  *x_pos = H.xbound + H.dx*(i-H.n_ghost) + 0.5*H.dx;
-  *y_pos = H.ybound + H.dy*(j-H.n_ghost) + 0.5*H.dy;
-  *z_pos = H.zbound + H.dz*(k-H.n_ghost) + 0.5*H.dz;
+  *x_pos = H.xbound + H.dx * (i - H.n_ghost) + 0.5 * H.dx;
+  *y_pos = H.ybound + H.dy * (j - H.n_ghost) + 0.5 * H.dy;
+  *z_pos = H.zbound + H.dz * (k - H.n_ghost) + 0.5 * H.dz;
 
-#else   /*MPI_CHOLLA*/
+#else /*MPI_CHOLLA*/
 
   /* position relative to local xyz bounds */
-  /* This approach was replaced because it is less consistent for multiple cores.
-  Since distributive property does not perfectly hold for floating point operations
+  /* This approach was replaced because it is less consistent for multiple
+  cores. Since distributive property does not perfectly hold for floating point
+  operations
 
   > Global_bound + global_i * dx
 
   is more consistent than
 
-  >local_bound + local_i*dx = (global_bound + (global_i-local_i)*dx) + local_i*dx.
+  >local_bound + local_i*dx = (global_bound + (global_i-local_i)*dx) +
+  local_i*dx.
 
   *x_pos = H.xblocal + H.dx*(i-H.n_ghost) + 0.5*H.dx;
   *y_pos = H.yblocal + H.dy*(j-H.n_ghost) + 0.5*H.dy;
   *z_pos = H.zblocal + H.dz*(k-H.n_ghost) + 0.5*H.dz;
   */
 
-  *x_pos = H.xbound + (nx_local_start+i-H.n_ghost)*H.dx + 0.5*H.dx;
-  *y_pos = H.ybound + (ny_local_start+j-H.n_ghost)*H.dy + 0.5*H.dy;
-  *z_pos = H.zbound + (nz_local_start+k-H.n_ghost)*H.dz + 0.5*H.dz;
-
-#endif  /*MPI_CHOLLA*/
+  *x_pos = H.xbound + (nx_local_start + i - H.n_ghost) * H.dx + 0.5 * H.dx;
+  *y_pos = H.ybound + (ny_local_start + j - H.n_ghost) * H.dy + 0.5 * H.dy;
+  *z_pos = H.zbound + (nz_local_start + k - H.n_ghost) * H.dz + 0.5 * H.dz;
 
+#endif /*MPI_CHOLLA*/
 }
 
+Real Grid3D::Calc_Inverse_Timestep()
+{
+  // ==Calculate the next inverse time step using Calc_dt_GPU from
+  // hydro/hydro_cuda.h==
+  return Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama);
+}
 
 /*! \fn void Initialize(int nx_in, int ny_in, int nz_in)
  *  \brief Initialize the grid. */
-void Grid3D::Initialize(struct parameters *P)
+void Grid3D::Initialize(struct Parameters *P)
 {
   // number of fields to track (default 5 is # of conserved variables)
   H.n_fields = 5;
 
-  // if including passive scalars increase the number of fields
-  #ifdef SCALAR
+// if including passive scalars increase the number of fields
+#ifdef SCALAR
   H.n_fields += NSCALARS;
-  #endif
+#endif
 
-  // if including magnetic fields increase the number of fields
-  #ifdef  MHD
+// if including magnetic fields increase the number of fields
+#ifdef MHD
   H.n_fields += 3;
-  #endif  //MHD
+#endif  // MHD
 
-  // if using dual energy formalism must track internal energy - always the last field!
-  #ifdef DE
+// if using dual energy formalism must track internal energy - always the last
+// field!
+#ifdef DE
   H.n_fields++;
-  #endif
+#endif
 
   int nx_in = P->nx;
   int ny_in = P->ny;
   int nz_in = P->nz;
 
+#ifdef STATIC_GRAV
+  H.custom_grav = P->custom_grav;  // Initialize the custom static gravity flag
+  if (H.custom_grav == 0) {
+    printf("WARNING: No custom gravity field given. Gravity field will be set to zero.\n");
+  }
+#endif
+
   // Set the CFL coefficient (a global variable)
   C_cfl = 0.3;
-  
-  #ifdef AVERAGE_SLOW_CELLS
-  H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number
-  #endif
+
+#ifdef AVERAGE_SLOW_CELLS
+  H.min_dt_slow = 1e-100;  // Initialize the minumum dt to a tiny number
+#endif                     // AVERAGE_SLOW_CELLS
 
 #ifndef MPI_CHOLLA
 
   // set grid dimensions
-  H.nx = nx_in+2*H.n_ghost;
+  H.nx      = nx_in + 2 * H.n_ghost;
   H.nx_real = nx_in;
-  if (ny_in == 1) H.ny = 1;
-  else H.ny = ny_in+2*H.n_ghost;
+  if (ny_in == 1)
+    H.ny = 1;
+  else
+    H.ny = ny_in + 2 * H.n_ghost;
   H.ny_real = ny_in;
-  if (nz_in == 1) H.nz = 1;
-  else H.nz = nz_in+2*H.n_ghost;
+  if (nz_in == 1)
+    H.nz = 1;
+  else
+    H.nz = nz_in + 2 * H.n_ghost;
   H.nz_real = nz_in;
 
   // set total number of cells
   H.n_cells = H.nx * H.ny * H.nz;
 
-#else  /*MPI_CHOLLA*/
+#else /*MPI_CHOLLA*/
 
   /* perform domain decomposition
    * and set grid dimensions
@@ -164,20 +189,16 @@ void Grid3D::Initialize(struct parameters *P)
 #endif /*MPI_CHOLLA*/
 
   // failsafe
-  if(H.n_cells<=0)
-  {
+  if (H.n_cells <= 0) {
     chprintf("Error initializing grid: H.n_cells = %d\n", H.n_cells);
     chexit(-1);
   }
 
   // check for initialization
-  if(flag_init)
-  {
+  if (flag_init) {
     chprintf("Already initialized. Please reset.\n");
     return;
-  }
-  else
-  {
+  } else {
     // mark that we are initializing
     flag_init = 1;
   }
@@ -191,399 +212,398 @@ void Grid3D::Initialize(struct parameters *P)
   // and initialize the timestep
   H.dt = 0.0;
 
-  // Set Transfer flag to false, only set to true before Conserved boundaries are transferred
+  // Set Transfer flag to false, only set to true before Conserved boundaries
+  // are transferred
   H.TRANSFER_HYDRO_BOUNDARIES = false;
 
   // Set output to true when data has to be written to file;
   H.Output_Now = false;
 
-
   // allocate memory
   AllocateMemory();
 
-
 #ifdef ROTATED_PROJECTION
-  //x-dir pixels in projection
+  // x-dir pixels in projection
   R.nx = P->nxr;
-  //z-dir pixels in projection
+  // z-dir pixels in projection
   R.nz = P->nzr;
-  //minimum x location to project
+  // minimum x location to project
   R.nx_min = 0;
-  //minimum z location to project
+  // minimum z location to project
   R.nz_min = 0;
-  //maximum x location to project
+  // maximum x location to project
   R.nx_max = R.nx;
-  //maximum z location to project
+  // maximum z location to project
   R.nz_max = R.nz;
-  //rotation angle about z direction
-  R.delta = M_PI*(P->delta/180.); //convert to radians
-  //rotation angle about x direction
-  R.theta = M_PI*(P->theta/180.); //convert to radians
-  //rotation angle about y direction
-  R.phi = M_PI*(P->phi/180.); //convert to radians
-  //x-dir physical size of projection
+  // rotation angle about z direction
+  R.delta = M_PI * (P->delta / 180.);  // convert to radians
+  // rotation angle about x direction
+  R.theta = M_PI * (P->theta / 180.);  // convert to radians
+  // rotation angle about y direction
+  R.phi = M_PI * (P->phi / 180.);  // convert to radians
+  // x-dir physical size of projection
   R.Lx = P->Lx;
-  //z-dir physical size of projection
+  // z-dir physical size of projection
   R.Lz = P->Lz;
-  //initialize a counter for rotated outputs
+  // initialize a counter for rotated outputs
   R.i_delta = 0;
-  //number of rotated outputs in a complete revolution
+  // number of rotated outputs in a complete revolution
   R.n_delta = P->n_delta;
-  //rate of rotation between outputs, for an actual simulation
+  // rate of rotation between outputs, for an actual simulation
   R.ddelta_dt = P->ddelta_dt;
-  //are we not rotating about z(0)?
-  //are we outputting multiple rotations(1)? or rotating during a simulation(2)?
+  // are we not rotating about z(0)?
+  // are we outputting multiple rotations(1)? or rotating during a
+  // simulation(2)?
   R.flag_delta = P->flag_delta;
 #endif /*ROTATED_PROJECTION*/
 
-  // Values for lower limit for density and temperature
-  #ifdef DENSITY_FLOOR
-  H.density_floor = DENS_FLOOR;
-  #else
-  H.density_floor = 0.0;
-  #endif
+// Values for lower limit for density and temperature
+#ifdef TEMPERATURE_FLOOR
+  H.temperature_floor = P->temperature_floor;
+#endif
 
-  #ifdef TEMPERATURE_FLOOR
-  H.temperature_floor = TEMP_FLOOR;
-  #else
-  H.temperature_floor = 0.0;
-  #endif
+#ifdef DENSITY_FLOOR
+  H.density_floor = P->density_floor;
+#endif
 
-  #ifdef COSMOLOGY
-  if ( P->scale_outputs_file[0] == '\0' ) H.OUTPUT_SCALE_FACOR = false;
-  else H.OUTPUT_SCALE_FACOR = true;
-  #endif
+#ifdef SCALAR_FLOOR
+  H.scalar_floor = P->scalar_floor;
+#endif
 
-  H.Output_Initial = true;
+#ifdef COSMOLOGY
+  H.OUTPUT_SCALE_FACOR = not(P->scale_outputs_file[0] == '\0');
+#endif
 
+#ifdef SCALAR
+  #ifdef DUST
+  H.grain_radius = P->grain_radius;
+  #endif
+#endif
 
+  H.Output_Initial = true;
 }
 
-
 /*! \fn void AllocateMemory(void)
  *  \brief Allocate memory for the arrays. */
 void Grid3D::AllocateMemory(void)
 {
   // allocate memory for the conserved variable arrays
   // allocate all the memory to density, to insure contiguous memory
-  CudaSafeCall( cudaHostAlloc((void**)&C.host, H.n_fields*H.n_cells*sizeof(Real), cudaHostAllocDefault) );
+  GPU_Error_Check(cudaHostAlloc((void **)&C.host, H.n_fields * H.n_cells * sizeof(Real), cudaHostAllocDefault));
 
   // point conserved variables to the appropriate locations
-  C.density  = C.host;
-  C.momentum_x = &(C.host[H.n_cells]);
-  C.momentum_y = &(C.host[2*H.n_cells]);
-  C.momentum_z = &(C.host[3*H.n_cells]);
-  C.Energy   = &(C.host[4*H.n_cells]);
-  #ifdef SCALAR
-  C.scalar  = &(C.host[5*H.n_cells]);
-  #endif  //SCALAR
-  #ifdef  MHD
-  C.magnetic_x = &(C.host[(5 + NSCALARS)*H.n_cells]);
-  C.magnetic_y = &(C.host[(6 + NSCALARS)*H.n_cells]);
-  C.magnetic_z = &(C.host[(7 + NSCALARS)*H.n_cells]);
-  #endif  //MHD
-  #ifdef DE
-  C.GasEnergy = &(C.host[(H.n_fields-1)*H.n_cells]);
-  #endif  //DE
+  C.density    = &(C.host[grid_enum::density * H.n_cells]);
+  C.momentum_x = &(C.host[grid_enum::momentum_x * H.n_cells]);
+  C.momentum_y = &(C.host[grid_enum::momentum_y * H.n_cells]);
+  C.momentum_z = &(C.host[grid_enum::momentum_z * H.n_cells]);
+  C.Energy     = &(C.host[grid_enum::Energy * H.n_cells]);
+#ifdef SCALAR
+  C.scalar = &(C.host[H.n_cells * grid_enum::scalar]);
+  #ifdef BASIC_SCALAR
+  C.basic_scalar = &(C.host[H.n_cells * grid_enum::basic_scalar]);
+  #endif
+  #ifdef DUST
+  C.dust_density = &(C.host[H.n_cells * grid_enum::dust_density]);
+  #endif
+#endif  // SCALAR
+#ifdef MHD
+  C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]);
+  C.magnetic_y = &(C.host[grid_enum::magnetic_y * H.n_cells]);
+  C.magnetic_z = &(C.host[grid_enum::magnetic_z * H.n_cells]);
+#endif  // MHD
+#ifdef DE
+  C.GasEnergy = &(C.host[(H.n_fields - 1) * H.n_cells]);
+#endif  // DE
 
   // allocate memory for the conserved variable arrays on the device
-  CudaSafeCall( cudaMalloc((void**)&C.device, H.n_fields*H.n_cells*sizeof(Real)) );
+  GPU_Error_Check(cudaMalloc((void **)&C.device, H.n_fields * H.n_cells * sizeof(Real)));
+  cuda_utilities::initGpuMemory(C.device, H.n_fields * H.n_cells * sizeof(Real));
   C.d_density    = C.device;
   C.d_momentum_x = &(C.device[H.n_cells]);
-  C.d_momentum_y = &(C.device[2*H.n_cells]);
-  C.d_momentum_z = &(C.device[3*H.n_cells]);
-  C.d_Energy     = &(C.device[4*H.n_cells]);
-  #ifdef SCALAR
-  C.d_scalar     = &(C.device[5*H.n_cells]);
-  #endif  // SCALAR
-  #ifdef  MHD
-  C.d_magnetic_x   = &(C.device[(5 + NSCALARS)*H.n_cells]);
-  C.d_magnetic_y   = &(C.device[(6 + NSCALARS)*H.n_cells]);
-  C.d_magnetic_z   = &(C.device[(7 + NSCALARS)*H.n_cells]);
-  #endif  //MHD
-  #ifdef DE
-  C.d_GasEnergy  = &(C.device[(H.n_fields-1)*H.n_cells]);
-  #endif  // DE
-
-
-  // arrays that hold the max_dti calculation for hydro for each thread block (pre reduction)
-  int ngrid = (H.n_cells + TPB - 1) / TPB;
-  CudaSafeCall( cudaHostAlloc(&host_dti_array, ngrid*sizeof(Real), cudaHostAllocDefault) );
-  CudaSafeCall( cudaMalloc((void**)&dev_dti_array, ngrid*sizeof(Real)) );
-  CudaSafeCall( cudaMalloc((void**)&dev_dti, sizeof(Real)) );
-
-
-  #if defined( GRAVITY )
-  CudaSafeCall( cudaHostAlloc(&C.Grav_potential, H.n_cells*sizeof(Real), cudaHostAllocDefault) );
-  CudaSafeCall( cudaMalloc((void**)&C.d_Grav_potential, H.n_cells*sizeof(Real)) );
-  #else
+  C.d_momentum_y = &(C.device[2 * H.n_cells]);
+  C.d_momentum_z = &(C.device[3 * H.n_cells]);
+  C.d_Energy     = &(C.device[4 * H.n_cells]);
+#ifdef SCALAR
+  C.d_scalar = &(C.device[H.n_cells * grid_enum::scalar]);
+  #ifdef BASIC_SCALAR
+  C.d_basic_scalar = &(C.device[H.n_cells * grid_enum::basic_scalar]);
+  #endif
+  #ifdef DUST
+  C.d_dust_density = &(C.device[H.n_cells * grid_enum::dust_density]);
+  #endif
+#endif  // SCALAR
+#ifdef MHD
+  C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]);
+  C.d_magnetic_y = &(C.device[(grid_enum::magnetic_y)*H.n_cells]);
+  C.d_magnetic_z = &(C.device[(grid_enum::magnetic_z)*H.n_cells]);
+#endif  // MHD
+#ifdef DE
+  C.d_GasEnergy = &(C.device[(H.n_fields - 1) * H.n_cells]);
+#endif  // DE
+
+#if defined(GRAVITY)
+  GPU_Error_Check(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault));
+  GPU_Error_Check(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real)));
+#else
   C.Grav_potential   = NULL;
   C.d_Grav_potential = NULL;
-  #endif
-
+#endif
 
-  #ifdef CHEMISTRY_GPU
-  C.HI_density    = &C.scalar[ 0*H.n_cells ];
-  C.HII_density   = &C.scalar[ 1*H.n_cells ];
-  C.HeI_density   = &C.scalar[ 2*H.n_cells ];
-  C.HeII_density  = &C.scalar[ 3*H.n_cells ];
-  C.HeIII_density = &C.scalar[ 4*H.n_cells ];
-  C.e_density     = &C.scalar[ 5*H.n_cells ];
-  #endif
+#ifdef CHEMISTRY_GPU
+  C.HI_density    = &C.host[H.n_cells * grid_enum::HI_density];
+  C.HII_density   = &C.host[H.n_cells * grid_enum::HII_density];
+  C.HeI_density   = &C.host[H.n_cells * grid_enum::HeI_density];
+  C.HeII_density  = &C.host[H.n_cells * grid_enum::HeII_density];
+  C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density];
+  C.e_density     = &C.host[H.n_cells * grid_enum::e_density];
+#endif
 
   // initialize host array
-  for (int i=0; i<H.n_fields*H.n_cells; i++)
-  {
+  for (int i = 0; i < H.n_fields * H.n_cells; i++) {
     C.host[i] = 0.0;
   }
 
-  #ifdef CLOUDY_COOL
+#ifdef CLOUDY_COOL
   Load_Cuda_Textures();
-  #endif  // CLOUDY_COOL
-
+#endif  // CLOUDY_COOL
 }
 
-
 /*! \fn void set_dt(Real dti)
  *  \brief Set the timestep. */
- void Grid3D::set_dt(Real dti)
+void Grid3D::set_dt(Real dti)
 {
   Real max_dti;
 
-  #ifdef CPU_TIME
+#ifdef CPU_TIME
   Timer.Calc_dt.Start();
-  #endif
+#endif
 
-  #ifdef ONLY_PARTICLES
+#ifdef ONLY_PARTICLES
   // If only solving particles the time for hydro is set to a  large value,
   // that way the minimum dt is the one corresponding to particles
   H.dt = 1e10;
 
-  #else //NOT ONLY_PARTICLES
+#else  // NOT ONLY_PARTICLES
 
-  //Compute the hydro delta_t ( H.dt )
-  if (H.n_step == 0)
-  {
-    // Compute the time step
-    max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama );
-  }
-  else {
-    max_dti = dti;
-  }
+  // dti is calculated before first loop and at the end of Update_Grid
+  max_dti = dti;
 
   #ifdef MPI_CHOLLA
-    // Note that this is the MPI_Allreduce for every iteration of the loop, not
-    // just the first one
-    max_dti = ReduceRealMax(max_dti);
+  // Note that this is the MPI_Allreduce for every iteration of the loop, not
+  // just the first one
+  max_dti = ReduceRealMax(max_dti);
   #endif /*MPI_CHOLLA*/
 
-
   H.dt = C_cfl / max_dti;
 
-  #endif //ONLY_PARTICLES
+#endif  // ONLY_PARTICLES
 
-  #ifdef GRAVITY
-  //Set dt for hydro and particles
+#ifdef GRAVITY
+  // Set dt for hydro and particles
   set_dt_Gravity();
-  #endif  //GRAVITY
+#endif  // GRAVITY
 
-  #ifdef CPU_TIME
+#ifdef CPU_TIME
   Timer.Calc_dt.End();
-  #endif
-
-
+#endif
 }
 
-/*! \fn void Update_Grid(void)
- *  \brief Update the conserved quantities in each cell. */
-Real Grid3D::Update_Grid(void)
+/*! \fn void Execute_Hydro_Integratore_Grid(void)
+ *  \brief Updates cells by executing the hydro integrator. */
+void Grid3D::Execute_Hydro_Integrator(void)
 {
-
   Real max_dti = 0;
   int x_off, y_off, z_off;
 
   // set x, y, & z offsets of local CPU volume to pass to GPU
   // so global position on the grid is known
   x_off = y_off = z_off = 0;
-  #ifdef MPI_CHOLLA
+#ifdef MPI_CHOLLA
   x_off = nx_local_start;
   y_off = ny_local_start;
   z_off = nz_local_start;
-  #endif
-
-  // Set the lower limit for density and temperature (Internal Energy)
-  Real U_floor, density_floor;
-  density_floor = H.density_floor;
-  // Minimum of internal energy from minumum of temperature
-  U_floor = H.temperature_floor * KB / (gama - 1) / MP / SP_ENERGY_UNIT;
-  #ifdef COSMOLOGY
-  U_floor = H.temperature_floor / (gama - 1) / MP * KB * 1e-10; // ( km/s )^2
-  U_floor /=  Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a;
-  #endif
+#endif
 
+#ifdef CPU_TIME
+  Timer.Hydro_Integrator.Start();
+#endif  // CPU_TIME
 
   // Run the hydro integrator on the grid
-  if (H.nx > 1 && H.ny == 1 && H.nz == 1) //1D
+  if (H.nx > 1 && H.ny == 1 && H.nz == 1)  // 1D
   {
-    #ifdef CUDA
-    #ifdef VL
-    VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields);
-    #endif //VL
-    #ifdef SIMPLE
-    Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields);
-    #endif //SIMPLE
-    #endif //CUDA
-  }
-  else if (H.nx > 1 && H.ny > 1 && H.nz == 1) //2D
-  {
-    #ifdef CUDA
-    #ifdef VL
-    VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields);
-    #endif //VL
-    #ifdef SIMPLE 
-    Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields);
-    #endif //SIMPLE
-    #endif //CUDA
-  }
-  else if (H.nx > 1 && H.ny > 1 && H.nz > 1) //3D
+#ifdef VL
+    VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav);
+#endif  // VL
+#ifdef SIMPLE
+    Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav);
+#endif                                           // SIMPLE
+  } else if (H.nx > 1 && H.ny > 1 && H.nz == 1)  // 2D
   {
-    #ifdef CUDA
-    #ifdef VL
-    VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential );
-    #endif //VL
-    #ifdef SIMPLE
-    Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential );
-    #endif//SIMPLE
-    #endif
-  }
-  else
+#ifdef VL
+    VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt,
+                         H.n_fields, H.custom_grav);
+#endif  // VL
+#ifdef SIMPLE
+    Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt,
+                             H.n_fields, H.custom_grav);
+#endif                                          // SIMPLE
+  } else if (H.nx > 1 && H.ny > 1 && H.nz > 1)  // 3D
   {
+#ifdef VL
+    VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy,
+                         H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor,
+                         C.Grav_potential);
+#endif  // VL
+#ifdef SIMPLE
+    Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy,
+                             H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor,
+                             C.Grav_potential);
+#endif  // SIMPLE
+  } else {
     chprintf("Error: Grid dimensions nx: %d  ny: %d  nz: %d  not supported.\n", H.nx, H.ny, H.nz);
     chexit(-1);
   }
 
-
-  #ifdef CUDA
-
-  #ifdef COOLING_GPU
-  // ==Apply Cooling from cooling/cooling_cuda.h==
-  Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama);
-  #endif //COOLING_GPU
-
-  // Update the H and He ionization fractions and apply cooling and photoheating
-  #ifdef CHEMISTRY_GPU
-  Update_Chemistry();
-  #ifdef CPU_TIME
-  Timer.Chemistry.RecordTime( Chem.H.runtime_chemistry_step );
-  #endif
-  #endif
-  
-  #ifdef AVERAGE_SLOW_CELLS
-  //Set the min_delta_t for averaging a slow cell
-  Real max_dti_slow;
-  max_dti_slow = 1 / H.min_dt_slow;
-  Average_Slow_Cells( C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow );
-  #endif //AVERAGE_SLOW_CELLS
-
-  // ==Calculate the next time step with Calc_dt_GPU from hydro/hydro_cuda.h==
-  max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama );
-  #endif // CUDA
-
-  #ifdef COOLING_GRACKLE
-  Cool.fields.density = C.density;
-  Cool.fields.HI_density      = &C.scalar[ 0*H.n_cells ];
-  Cool.fields.HII_density     = &C.scalar[ 1*H.n_cells ];
-  Cool.fields.HeI_density     = &C.scalar[ 2*H.n_cells ];
-  Cool.fields.HeII_density    = &C.scalar[ 3*H.n_cells ];
-  Cool.fields.HeIII_density   = &C.scalar[ 4*H.n_cells ];
-  Cool.fields.e_density       = &C.scalar[ 5*H.n_cells ];
-  #ifdef GRACKLE_METALS
-  Cool.fields.metal_density   = &C.scalar[ 6*H.n_cells ];
-  #endif
-  #endif
-
-  #ifdef CHEMISTRY_GPU
-  C.HI_density    = &C.scalar[ 0*H.n_cells ];
-  C.HII_density   = &C.scalar[ 1*H.n_cells ];
-  C.HeI_density   = &C.scalar[ 2*H.n_cells ];
-  C.HeII_density  = &C.scalar[ 3*H.n_cells ];
-  C.HeIII_density = &C.scalar[ 4*H.n_cells ];
-  C.e_density     = &C.scalar[ 5*H.n_cells ];
-  #endif
-
-
-  return max_dti;
-
+#ifdef CPU_TIME
+  Timer.Hydro_Integrator.End(true);
+#endif  // CPU_TIME
 }
 
 /*! \fn void Update_Hydro_Grid(void)
  *  \brief Do all steps to update the hydro. */
-Real Grid3D::Update_Hydro_Grid( ){
-
-  #ifdef ONLY_PARTICLES
+Real Grid3D::Update_Hydro_Grid()
+{
+#ifdef ONLY_PARTICLES
   // Don't integrate the Hydro when only solving for particles
   return 1e-10;
-  #endif
+#endif  // ONLY_PARTICLES
 
-  Real dti;
-
-  #ifdef CPU_TIME
+#ifdef CPU_TIME
   Timer.Hydro.Start();
-  #endif //CPU_TIME
+  double non_hydro_elapsed_time = 0.0;
+#endif  // CPU_TIME
 
-  #ifdef GRAVITY
+#ifdef GRAVITY
   // Extrapolate gravitational potential for hydro step
   Extrapolate_Grav_Potential();
+#endif  // GRAVITY
+
+  Execute_Hydro_Integrator();
+
+#ifdef TEMPERATURE_FLOOR
+  // Set the lower limit temperature (Internal Energy)
+  Real U_floor;
+  // Minimum of internal energy from minumum of temperature
+  U_floor = H.temperature_floor * KB / (gama - 1) / MP / SP_ENERGY_UNIT;
+  #ifdef COSMOLOGY
+  U_floor = H.temperature_floor / (gama - 1) / MP * KB * 1e-10;  // ( km/s )^2
+  U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a;
   #endif
+  Apply_Temperature_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, U_floor);
+#endif  // TEMPERATURE_FLOOR
 
-  dti = Update_Grid();
+#ifdef SCALAR_FLOOR
+  #ifdef DUST
+  Apply_Scalar_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, grid_enum::dust_density, H.scalar_floor);
+  #endif
+#endif  // SCALAR_FLOOR
 
+// == Perform chemistry/cooling (there are a few different cases) ==
+#ifdef COOLING_GPU
   #ifdef CPU_TIME
-  #ifdef CHEMISTRY_GPU
-  Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step);
-  //Subtract the time spent on the Chemical Update 
+  Timer.Cooling_GPU.Start();
   #endif
-  Timer.Hydro.End();
-  #endif //CPU_TIME
-
-  #ifdef COOLING_GRACKLE
+  // ==Apply Cooling from cooling/cooling_cuda.h==
+  Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama);
   #ifdef CPU_TIME
-  Timer.Cooling.Start();
+  Timer.Cooling_GPU.End();
   #endif
-  Do_Cooling_Step_Grackle( );
+
+#endif  // COOLING_GPU
+
+#ifdef DUST
+  // ==Apply dust from dust/dust_cuda.h==
+  Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, H.grain_radius);
+#endif  // DUST
+
+#ifdef CHEMISTRY_GPU
+  // Update the H and He ionization fractions and apply cooling and photoheating
+  Update_Chemistry();
   #ifdef CPU_TIME
-  Timer.Cooling.End();
+  Timer.Chemistry.RecordTime(Chem.H.runtime_chemistry_step);
+  non_hydro_elapsed_time += Chem.H.runtime_chemistry_step;
+  #endif
+  C.HI_density    = &C.host[H.n_cells * grid_enum::HI_density];
+  C.HII_density   = &C.host[H.n_cells * grid_enum::HII_density];
+  C.HeI_density   = &C.host[H.n_cells * grid_enum::HeI_density];
+  C.HeII_density  = &C.host[H.n_cells * grid_enum::HeII_density];
+  C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density];
+  C.e_density     = &C.host[H.n_cells * grid_enum::e_density];
+#endif
+
+#ifdef COOLING_GRACKLE
+  Cool.fields.density       = C.density;
+  Cool.fields.HI_density    = &C.host[H.n_cells * grid_enum::HI_density];
+  Cool.fields.HII_density   = &C.host[H.n_cells * grid_enum::HII_density];
+  Cool.fields.HeI_density   = &C.host[H.n_cells * grid_enum::HeI_density];
+  Cool.fields.HeII_density  = &C.host[H.n_cells * grid_enum::HeII_density];
+  Cool.fields.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density];
+  Cool.fields.e_density     = &C.host[H.n_cells * grid_enum::e_density];
+
+  #ifdef GRACKLE_METALS
+  Cool.fields.metal_density = &C.host[H.n_cells * grid_enum::metal_density];
   #endif
-  #endif//COOLING_GRACKLE
 
+  #ifdef CPU_TIME
+  double cur_grackle_timing = Get_Time();
+  #endif  // CPU_TIME
+  Do_Cooling_Step_Grackle();
+  #ifdef CPU_TIME
+  double cur_grackle_timing = Get_Time() - cur_grackle_timing;
+  Timer.Cooling_Grackle.RecordTime(cur_grackle_timing);
+  non_hydro_elapsed_time += cur_grackle_timing;
+  #endif  // CPU_TIME
+#endif    // COOLING_GRACKLE
+
+  // == average slow cells and compute the new timestep ==
+#ifdef AVERAGE_SLOW_CELLS
+  // Set the min_delta_t for averaging a slow cell
+  Real max_dti_slow;
+  max_dti_slow = 1 / H.min_dt_slow;
+  Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow);
+#endif  // AVERAGE_SLOW_CELLS
+
+  // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h==
+  Real dti = Calc_Inverse_Timestep();
+
+#ifdef CPU_TIME
+  Timer.Hydro.Subtract(non_hydro_elapsed_time);
+  Timer.Hydro.End();
+#endif  // CPU_TIME
 
   return dti;
 }
 
-void Grid3D::Update_Time(){
-
+void Grid3D::Update_Time()
+{
   // update the time
   H.t += H.dt;
 
-  #ifdef PARTICLES
+#ifdef PARTICLES
   Particles.t = H.t;
 
   #ifdef COSMOLOGY
   Cosmo.current_a += Cosmo.delta_a;
-  Cosmo.current_z = 1./Cosmo.current_a - 1;
+  Cosmo.current_z     = 1. / Cosmo.current_a - 1;
   Particles.current_a = Cosmo.current_a;
   Particles.current_z = Cosmo.current_z;
-  Grav.current_a = Cosmo.current_a;
-  #endif //COSMOLOGY
-  #endif //PARTICLES
+  Grav.current_a      = Cosmo.current_a;
+  #endif  // COSMOLOGY
+#endif    // PARTICLES
 
-  #if defined(ANALYSIS) && defined(COSMOLOGY)
+#if defined(ANALYSIS) && defined(COSMOLOGY)
   Analysis.current_z = Cosmo.current_z;
-  #endif
-
-
-
-
+#endif
 }
 
 /*! \fn void Reset(void)
@@ -595,65 +615,70 @@ void Grid3D::Reset(void)
 
   // reset the initialization flag
   flag_init = 0;
-
 }
 
-
 /*! \fn void FreeMemory(void)
  *  \brief Free the memory allocated by the Grid3D class. */
 void Grid3D::FreeMemory(void)
 {
   // free the conserved variable arrays
-  CudaSafeCall( cudaFreeHost(C.host) );
+  GPU_Error_Check(cudaFreeHost(C.host));
 
-  // free the timestep arrays
-  CudaSafeCall( cudaFreeHost(host_dti_array) );
-  cudaFree(dev_dti_array);
-  cudaFree(dev_dti);
+#ifdef GRAVITY
+  GPU_Error_Check(cudaFreeHost(C.Grav_potential));
+  GPU_Error_Check(cudaFree(C.d_Grav_potential));
+#endif
 
-  #ifdef GRAVITY
-  CudaSafeCall( cudaFreeHost(C.Grav_potential) );
-  CudaSafeCall( cudaFree(C.d_Grav_potential) );
-  #endif
+// If memory is single allocated, free the memory at the end of the simulation.
+#ifdef VL
+  if (H.nx > 1 && H.ny == 1 && H.nz == 1) {
+    Free_Memory_VL_1D();
+  }
+  if (H.nx > 1 && H.ny > 1 && H.nz == 1) {
+    Free_Memory_VL_2D();
+  }
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
+    Free_Memory_VL_3D();
+  }
+#endif  // VL
+#ifdef SIMPLE
+  if (H.nx > 1 && H.ny == 1 && H.nz == 1) {
+    Free_Memory_Simple_1D();
+  }
+  if (H.nx > 1 && H.ny > 1 && H.nz == 1) {
+    Free_Memory_Simple_2D();
+  }
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
+    Free_Memory_Simple_3D();
+  }
+#endif  // SIMPLE
 
-  // If memory is single allocated, free the memory at the end of the simulation.
-  #ifdef VL
-  if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_VL_1D();
-  if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_VL_2D();
-  if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_VL_3D();
-  #endif // VL
-  #ifdef SIMPLE
-  if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_Simple_1D();
-  if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_Simple_2D();
-  if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_Simple_3D();
-  #endif // SIMPLE
-
-  #ifdef GRAVITY
+#ifdef GRAVITY
   Grav.FreeMemory_CPU();
   #ifdef GRAVITY_GPU
   Grav.FreeMemory_GPU();
   #endif
-  #endif
+#endif
 
-  #ifdef PARTICLES
+#ifdef PARTICLES
   Particles.Reset();
-  #endif
+#endif
 
-  #ifdef COOLING_GRACKLE
+#ifdef COOLING_GRACKLE
   Cool.Free_Memory();
-  #endif
+#endif
 
-  #ifdef COOLING_GPU
+#ifdef COOLING_GPU
   #ifdef CLOUDY_COOL
   Free_Cuda_Textures();
   #endif
-  #endif
+#endif
 
-  #ifdef CHEMISTRY_GPU
+#ifdef CHEMISTRY_GPU
   Chem.Reset();
-  #endif
+#endif
 
-  #ifdef ANALYSIS
+#ifdef ANALYSIS
   Analysis.Reset();
-  #endif
+#endif
 }
diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h
index ec48c27be..e248f6490 100644
--- a/src/grid/grid3D.h
+++ b/src/grid/grid3D.h
@@ -4,105 +4,104 @@
 #ifndef GRID3D_H
 #define GRID3D_H
 
-#ifdef   MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
+#ifdef MPI_CHOLLA
+  #include "../mpi/mpi_routines.h"
 #endif /*MPI_CHOLLA*/
 
 #include <stdio.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 
 #ifdef HDF5
-#include <hdf5.h>
+  #include <hdf5.h>
 #endif
 
 #ifdef GRAVITY
-#include "../gravity/grav3D.h"
+  #include "../gravity/grav3D.h"
 #endif
 
 #ifdef PARTICLES
-#include "../particles/particles_3D.h"
+  #include "../particles/particles_3D.h"
 #endif
 
 #include "../model/disk_galaxy.h"
 
 #ifdef COSMOLOGY
-#include "../cosmology/cosmology.h"
+  #include "../cosmology/cosmology.h"
 #endif
 
 #ifdef COOLING_GRACKLE
-#include "../cooling_grackle/cool_grackle.h"
+  #include "../cooling_grackle/cool_grackle.h"
 #endif
 
 #ifdef CPU_TIME
-#include "../utils/timing_functions.h"
+  #include "../utils/timing_functions.h"
 #endif
 
 #ifdef CHEMISTRY_GPU
-#include "chemistry_gpu/chemistry_gpu.h"
+  #include "chemistry_gpu/chemistry_gpu.h"
 #endif
 
 #ifdef ANALYSIS
-#include "../analysis/analysis.h"
+  #include "../analysis/analysis.h"
 #endif
 
-
-struct Rotation
-{
+struct Rotation {
   /*! \var nx
-  *   \brief Number of pixels in x-dir of rotated, projected image*/
+   *   \brief Number of pixels in x-dir of rotated, projected image*/
   int nx;
 
   /*! \var nz
-  *   \brief Number of pixels in z-dir of rotated, projected image*/
+   *   \brief Number of pixels in z-dir of rotated, projected image*/
   int nz;
 
   /*! \var nx_min
-  *   \brief Left most point in the projected image for this subvolume*/
+   *   \brief Left most point in the projected image for this subvolume*/
   int nx_min;
 
   /*! \var nx_max
-  *   \brief Right most point in the projected image for this subvolume*/
+   *   \brief Right most point in the projected image for this subvolume*/
   int nx_max;
 
   /*! \var nz_min
-  *   \brief Bottom most point in the projected image for this subvolume*/
+   *   \brief Bottom most point in the projected image for this subvolume*/
   int nz_min;
 
   /*! \var nz_max
-  *   \brief Top most point in the projected image for this subvolume*/
+   *   \brief Top most point in the projected image for this subvolume*/
   int nz_max;
 
   /*! \var delta
-  *   \brief Rotation angle about z axis in simulation frame*/
+   *   \brief Rotation angle about z axis in simulation frame*/
   Real delta;
 
   /*! \var theta
-  *   \brief Rotation angle about x axis in simulation frame*/
+   *   \brief Rotation angle about x axis in simulation frame*/
   Real theta;
 
   /*! \var phi
-  *   \brief Rotation angle about y axis in simulation frame*/
+   *   \brief Rotation angle about y axis in simulation frame*/
   Real phi;
 
   /*! \var Lx
-  *   \brief Physical x-dir size of projected image*/
+   *   \brief Physical x-dir size of projected image*/
   Real Lx;
 
   /*! \var Lz
-  *   \brief Physical z-dir size of projected image*/
+   *   \brief Physical z-dir size of projected image*/
   Real Lz;
 
   /*! \var i_delta
-  *   \brief number of output projection for delta rotation*/
+   *   \brief number of output projection for delta rotation*/
   int i_delta;
 
   /*! \var n_delta
-  *   \brief total number of output projection for delta rotation*/
+   *   \brief total number of output projection for delta rotation*/
   Real n_delta;
 
   /*! \var ddelta_dt
-  *   \brief rate of delta rotation*/
+   *   \brief rate of delta rotation*/
   Real ddelta_dt;
 
   /*! \var flag_delta
@@ -110,38 +109,37 @@ struct Rotation
   int flag_delta;
 };
 
-struct Header
-{
+struct Header {
   /*! \var n_cells
-  *  \brief Total number of cells in the grid (including ghost cells) */
+   *  \brief Total number of cells in the grid (including ghost cells) */
   int n_cells;
 
   /*! \var n_ghost
-  *  \brief Number of ghost cells on each side of the grid */
+   *  \brief Number of ghost cells on each side of the grid */
   int n_ghost;
 
   /*! \var nx
-  *  \brief Total number of cells in the x-dimension */
+   *  \brief Total number of cells in the x-dimension */
   int nx;
 
   /*! \var ny
-  *  \brief Total number of cells in the y-dimension */
+   *  \brief Total number of cells in the y-dimension */
   int ny;
 
   /*! \var nz
-  *  \brief Total number of cells in the z-dimension */
+   *  \brief Total number of cells in the z-dimension */
   int nz;
 
   /*! \var nx_real
-  *  \brief Number of real cells in the x-dimension */
+   *  \brief Number of real cells in the x-dimension */
   int nx_real;
 
   /*! \var ny
-  *  \brief Number of real cells in the y-dimension */
+   *  \brief Number of real cells in the y-dimension */
   int ny_real;
 
   /*! \var nz
-  *  \brief Number of real cells in the z-dimension */
+   *  \brief Number of real cells in the z-dimension */
   int nz_real;
 
   /*! \var xbound */
@@ -156,7 +154,7 @@ struct Header
   /*  \brief Global domain z-direction minimum */
   Real zbound;
 
-   /*! \var xblocal */
+  /*! \var xblocal */
   /*  \brief Local domain x-direction minimum */
   Real xblocal;
 
@@ -193,51 +191,56 @@ struct Header
   Real zdglobal;
 
   /*! \var dx
-  *  \brief x-width of cells */
+   *  \brief x-width of cells */
   Real dx;
 
   /*! \var dy
-  *  \brief y-width of cells */
+   *  \brief y-width of cells */
   Real dy;
 
   /*! \var dz
-  *  \brief z-width of cells */
+   *  \brief z-width of cells */
   Real dz;
 
   /*! \var t
-  *  \brief Simulation time */
+   *  \brief Simulation time */
   Real t;
 
   /*! \var dt
-  *  \brief Length of the current timestep */
+   *  \brief Length of the current timestep */
   Real dt;
 
-  #ifdef AVERAGE_SLOW_CELLS
+#ifdef AVERAGE_SLOW_CELLS
   Real min_dt_slow;
-  #endif
+#endif
 
   /*! \var t_wall
-  *  \brief Wall time */
+   *  \brief Wall time */
   Real t_wall;
 
   /*! \var n_step
-  *  \brief Number of timesteps taken */
+   *  \brief Number of timesteps taken */
   int n_step;
 
   /*! \var n_fields
-  *  \brief Number of fields (conserved variables, scalars, etc.) */
+   *  \brief Number of fields (conserved variables, scalars, etc.) */
   int n_fields;
 
+  /*! \var custom_grav
+   *  \brief Flag to set specific static gravity field */
+  int custom_grav;
+
   // Values for lower limit for density and temperature
-  Real density_floor;
   Real temperature_floor;
+  Real density_floor;
+  Real scalar_floor;
 
   Real Ekin_avrg;
 
-  //Flag to indicate when to transfer the Conserved boundaries
+  // Flag to indicate when to transfer the Conserved boundaries
   bool TRANSFER_HYDRO_BOUNDARIES;
 
-  //Parameters For Spherical Colapse Problem
+  // Parameters For Spherical Colapse Problem
   Real sphere_density;
   Real sphere_radius;
   Real sphere_background_density;
@@ -245,607 +248,677 @@ struct Header
   Real sphere_center_y;
   Real sphere_center_z;
 
-
-  #ifdef GRAVITY
+#ifdef GRAVITY
   /*! \var n_ghost_potential_offset
-  *  \brief Number of offset betewen hydro_ghost_cells and potential_ghost_cells */
+   *  \brief Number of offset betewen hydro_ghost_cells and
+   * potential_ghost_cells */
   int n_ghost_potential_offset;
-  #endif
+#endif
 
-  #ifdef COSMOLOGY
+#ifdef COSMOLOGY
   bool OUTPUT_SCALE_FACOR;
-  #endif
+#endif
 
   /*! \var Output_Now
-  *  \brief Flag set to true when data has to be written to file */
+   *  \brief Flag set to true when data has to be written to file */
   bool Output_Now;
   bool Output_Initial;
 
   /*! \var Output_Complete_Data
-  *  \brief Flag set to true when all the data will  be written to file (Restart File ) */
+   *  \brief Flag set to true when all the data will  be written to file
+   * (Restart File ) */
   bool Output_Complete_Data;
 
-
+#ifdef SCALAR
+  #ifdef DUST
+  Real grain_radius;
+  #endif
+#endif
 };
 
 /*! \class Grid3D
  *  \brief Class to create a 3D grid of cells. */
 class Grid3D
 {
-  public:
+ public:
+  /*! \var flag_init
+   *  \brief Initialization flag */
+  int flag_init;
 
-    /*! \var flag_init
-     *  \brief Initialization flag */
-    int flag_init;
+  /*! \var struct Header H
+   *  \brief Header for the grid */
+  struct Header H;
 
-    /*! \var struct Header H
-     *  \brief Header for the grid */
-    struct Header H;
+  /*! \var struct Rotation R
+   *  \brief Rotation struct for data projections */
+  struct Rotation R;
 
-    /*! \var struct Rotation R
-     *  \brief Rotation struct for data projections */
-    struct Rotation R;
+#ifdef GRAVITY
+  // Object that contains data for gravity
+  Grav3D Grav;
+#endif
 
-    #ifdef GRAVITY
-    // Object that contains data for gravity
-    Grav3D Grav;
-    #endif
+#ifdef PARTICLES
+  // Object that contains data for particles
+  Particles3D Particles;
+#endif
 
-    #ifdef PARTICLES
-    // Object that contains data for particles
-    Particles_3D Particles;
-    #endif
+#ifdef COSMOLOGY
+  // Object that contains data for cosmology
+  Cosmology Cosmo;
+#endif
 
-    #ifdef COSMOLOGY
-    // Object that contains data for cosmology
-    Cosmology Cosmo;
-    #endif
+#ifdef COOLING_GRACKLE
+  // Object that contains data for Grackle cooling
+  Cool_GK Cool;
+#endif
 
-    #ifdef COOLING_GRACKLE
-    // Object that contains data for Grackle cooling
-    Cool_GK Cool;
-    #endif
+#ifdef CPU_TIME
+  Time Timer;
+#endif
 
-    #ifdef CPU_TIME
-    Time Timer;
-    #endif
+#ifdef CHEMISTRY_GPU
+  // Object that contains data for the GPU chemistry solver
+  Chem_GPU Chem;
+#endif
 
-    #ifdef CHEMISTRY_GPU
-    // Object that contains data for the GPU chemistry solver
-    Chem_GPU Chem;
-    #endif
+#ifdef ANALYSIS
+  AnalysisModule Analysis;
+#endif
 
-    #ifdef ANALYSIS
-    Analysis_Module Analysis;
-    #endif
+#ifdef SUPERNOVA  // TODO refactor this into Analysis module
+  Real countSN;
+  Real countResolved;
+  Real countUnresolved;
+  Real totalEnergy;
+  Real totalMomentum;
+  Real totalUnresEnergy;
+#endif
+  struct Conserved {
+    /*! pointer to conserved variable array on the host */
+    Real *host;
+
+    /*! \var density
+     *  \brief Array containing the density of each cell in the grid */
+    Real *density;
+
+    /*! \var momentum_x
+     *  \brief Array containing the momentum in the x direction of each cell in
+     * the grid */
+    Real *momentum_x;
+
+    /*! \var momentum_y
+     *  \brief Array containing the momentum in the y direction of each cell in
+     * the grid */
+    Real *momentum_y;
+
+    /*! \var momentum_z
+     *  \brief Array containing the momentum in the z direction of each cell in
+     * the grid */
+    Real *momentum_z;
+
+    /*! \var Energy
+     *  \brief Array containing the total Energy of each cell in the grid */
+    Real *Energy;
+
+#ifdef SCALAR
+    /*! \var scalar
+     *  \brief Array containing the values of passive scalar variable(s). */
+    Real *scalar;
+  #ifdef BASIC_SCALAR
+    /*! \var basic_scalar
+     *  \brief Array containing the values of a basic passive scalar variable.
+     */
+    Real *basic_scalar;
+  #endif
+  #ifdef DUST
+    /*! \var dust_density
+     *  \brief Array containing the dust densities.
+     */
+    Real *dust_density;
+  #endif
+#endif  // SCALAR
+
+#ifdef MHD
+    /*! \var magnetic_x \brief Array containing the magnetic field in the x
+     *  direction of each cell in the grid. Note that this is the magnetic
+     *  field at the x+1/2 face of the cell since constrained transport
+     *  requires face centered, not cell centered, magnetic fields */
+    Real *magnetic_x;
+
+    /*! \var magnetic_y \brief Array containing the magnetic field in the y
+     *  direction of each cell in the grid. Note that this is the magnetic
+     *  field at the y+1/2 face of the cell since constrained transport
+     *  requires face centered, not cell centered, magnetic fields */
+    Real *magnetic_y;
+
+    /*! \var magnetic_z \brief Array containing the magnetic field in the z
+     *  direction of each cell in the grid. Note that this is the magnetic
+     *  field at the z+1/2 face of the cell since constrained transport
+     *  requires face centered, not cell centered, magnetic fields */
+    Real *magnetic_z;
+#endif  // MHD
+
+#ifdef DE
+    /*! \var GasEnergy
+     *  \brief Array containing the internal energy of each cell, only tracked
+     separately when using the dual-energy formalism. */
+    Real *GasEnergy;
+#endif  // DE
+
+    /*! \var grav_potential
+     *  \brief Array containing the gravitational potential of each cell, only
+     * tracked separately when using  GRAVITY. */
+    Real *Grav_potential;
 
-    struct Conserved
-    {
-      /*! pointer to conserved variable array on the host */
-      Real *host;
-
-      /*! \var density
-       *  \brief Array containing the density of each cell in the grid */
-      Real *density;
-
-      /*! \var momentum_x
-       *  \brief Array containing the momentum in the x direction of each cell in the grid */
-      Real *momentum_x;
-
-      /*! \var momentum_y
-       *  \brief Array containing the momentum in the y direction of each cell in the grid */
-      Real *momentum_y;
-
-      /*! \var momentum_z
-       *  \brief Array containing the momentum in the z direction of each cell in the grid */
-      Real *momentum_z;
-
-      /*! \var Energy
-       *  \brief Array containing the total Energy of each cell in the grid */
-      Real *Energy;
-
-      #ifdef SCALAR
-      /*! \var scalar
-       *  \brief Array containing the values of the passive scalar variable(s). */
-      Real *scalar;
-      #endif  // SCALAR
-
-      #ifdef MHD
-      /*! \var magnetic_x \brief Array containing the magnetic field in the x
-       *  direction of each cell in the grid. Note that this is the magnetic
-       *  field at the x+1/2 face of the cell since constrained transport
-       *  requires face centered, not cell centered, magnetic fields */
-      Real *magnetic_x;
-
-      /*! \var magnetic_y \brief Array containing the magnetic field in the y
-       *  direction of each cell in the grid. Note that this is the magnetic
-       *  field at the y+1/2 face of the cell since constrained transport
-       *  requires face centered, not cell centered, magnetic fields */
-      Real *magnetic_y;
-
-      /*! \var magnetic_z \brief Array containing the magnetic field in the z
-       *  direction of each cell in the grid. Note that this is the magnetic
-       *  field at the z+1/2 face of the cell since constrained transport
-       *  requires face centered, not cell centered, magnetic fields */
-      Real *magnetic_z;
-      #endif  // MHD
-
-      #ifdef DE
-      /*! \var GasEnergy
-       *  \brief Array containing the internal energy of each cell, only tracked separately when using
-           the dual-energy formalism. */
-      Real *GasEnergy;
-      #endif  // DE
-
-      /*! \var grav_potential
-      *  \brief Array containing the gravitational potential of each cell, only tracked separately when using  GRAVITY. */
-      Real *Grav_potential;
-
-      #ifdef CHEMISTRY_GPU
-      Real *HI_density;
-      Real *HII_density;
-      Real *HeI_density;
-      Real *HeII_density;
-      Real *HeIII_density;
-      Real *e_density;
-      #endif
-
-
-      /*! pointer to conserved variable on device */
-      Real *device;
-      Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z,
-           *d_Energy, *d_scalar, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z,
-           *d_GasEnergy;
-
-       /*! pointer to gravitational potential on device */
-      Real *d_Grav_potential;
-    } C;
-
-
-    /*! \fn Grid3D(void)
-     *  \brief Constructor for the grid */
-    Grid3D(void);
-
-    /*! \fn void Initialize(int nx_in, int ny_in, int nz_in)
-     *  \brief Initialize the grid. */
-    void Initialize(struct parameters *P);
-
-    /*! \fn void AllocateMemory(void)
-     *  \brief Allocate memory for the d, m, E arrays. */
-    void AllocateMemory(void);
-
-    /*! \fn void Set_Initial_Conditions(parameters P)
-     *  \brief Set the initial conditions based on info in the parameters structure. */
-    void Set_Initial_Conditions(parameters P);
-
-    /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos)
-     *  \brief Get the cell-centered position based on cell index */
-    void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos);
-
-    /*! \fn void Set_Domain_Properties(struct parameters P)
-     *  \brief Set local domain properties */
-    void Set_Domain_Properties(struct parameters P);
-
-    /*! \fn void set_dt(Real dti)
-     *  \brief Calculate the timestep. */
-    void set_dt(Real dti);
-
-    #ifdef GRAVITY
-    /*! \fn void set_dt(Real dti)
-     *  \brief Calculate the timestep for Gravity. */
-    void set_dt_Gravity();
-    #endif
+#ifdef CHEMISTRY_GPU
+    Real *HI_density;
+    Real *HII_density;
+    Real *HeI_density;
+    Real *HeII_density;
+    Real *HeIII_density;
+    Real *e_density;
+#endif
 
-    /*! \fn Real calc_dti_CPU_1D()
-     *  \brief Calculate the maximum inverse timestep on 1D, according to the CFL condition (Toro 6.17). */
-    Real calc_dti_CPU_1D();
+    /*! pointer to conserved variable on device */
+    Real *device;
+    Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar,
+        *d_dust_density, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy;
 
-    /*! \fn Real calc_dti_CPU_2D()
-     *  \brief Calculate the maximum inverse timestep on 2D, according to the CFL condition (Toro 6.17). */
-    Real calc_dti_CPU_2D();
+    /*! pointer to gravitational potential on device */
+    Real *d_Grav_potential;
+  } C;
 
-    /*! \fn Real calc_dti_CPU_3D_function()
-     *  \brief Calculate the maximum inverse timestep on 3D using openMP, according to the CFL condition (Toro 6.17). */
-    Real calc_dti_CPU_3D_function( int g_start, int g_end );
+  /*! \fn Grid3D(void)
+   *  \brief Constructor for the grid */
+  Grid3D(void);
 
-    /*! \fn Real calc_dti_CPU_3D()
-     *  \brief Calculate the maximum inverse timestep on 3D, according to the CFL condition (Toro 6.17). */
-    Real calc_dti_CPU_3D();
+  /*! \fn void Initialize(int nx_in, int ny_in, int nz_in)
+   *  \brief Initialize the grid. */
+  void Initialize(struct Parameters *P);
 
-    /*! \fn Real calc_dti_CPU()
-     *  \brief Calculate the maximum inverse timestep, according to the CFL condition (Toro 6.17). */
-    Real calc_dti_CPU();
+  /*! \fn void AllocateMemory(void)
+   *  \brief Allocate memory for the d, m, E arrays. */
+  void AllocateMemory(void);
 
-    /*! \fn void Update_Grid(void)
-     *  \brief Update the conserved quantities in each cell. */
-    Real Update_Grid(void);
+  /*! \fn void Set_Initial_Conditions(Parameters P )
+   *  \brief Set the initial conditions based on info in the parameters
+   * structure. */
+  void Set_Initial_Conditions(Parameters P);
 
-    /*! \fn void Update_Hydro_Grid(void)
-     *  \brief Do all steps to update the hydro. */
-    Real Update_Hydro_Grid(void);
+  /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real
+   * *zpos) \brief Get the cell-centered position based on cell index */
+  void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos);
 
-    void Update_Time();
+  Real Calc_Inverse_Timestep();
 
-     /*! \fn void Write_Header_Text(FILE *fp)
-     *  \brief Write the relevant header info to a text output file. */
-    void Write_Header_Text(FILE *fp);
+  /*! \fn void Set_Domain_Properties(struct Parameters P)
+   *  \brief Set local domain properties */
+  void Set_Domain_Properties(struct Parameters P);
 
-    /*! \fn void Write_Grid_Text(FILE *fp)
-     *  \brief Write the grid to a file, at the current simulation time. */
-    void Write_Grid_Text(FILE *fp);
+  /*! \fn void set_dt(Real dti)
+   *  \brief Calculate the timestep. */
+  void set_dt(Real dti);
 
-    /*! \fn void Write_Header_Binary(FILE *fp)
-     *  \brief Write the relevant header info to a binary output file. */
-    void Write_Header_Binary(FILE *fp);
+#ifdef GRAVITY
+  /*! \fn void set_dt(Real dti)
+   *  \brief Calculate the timestep for Gravity. */
+  void set_dt_Gravity();
+#endif
 
-    /*! \fn void Write_Grid_Binary(FILE *fp)
-     *  \brief Write the grid to a file, at the current simulation time. */
-    void Write_Grid_Binary(FILE *fp);
+  /*! \fn void Execute_Hydro_Integratore_Grid(void)
+   *  \brief Updates cells by executing the hydro integrator. */
+  void Execute_Hydro_Integrator(void);
 
-#ifdef HDF5
-    /*! \fn void Write_Header_HDF5(hid_t file_id)
-     *  \brief Write the relevant header info to the HDF5 file. */
-    void Write_Header_HDF5(hid_t file_id);
+  /*! \fn void Update_Hydro_Grid(void)
+   *  \brief Do all steps to update the hydro. */
+  Real Update_Hydro_Grid(void);
 
-    /*! \fn void Write_Grid_HDF5(hid_t file_id)
-     *  \brief Write the grid to a file, at the current simulation time. */
-    void Write_Grid_HDF5(hid_t file_id);
+  void Update_Time();
+  /*! \fn void Write_Header_Text(FILE *fp)
+   *  \brief Write the relevant header info to a text output file. */
+  void Write_Header_Text(FILE *fp);
 
-    /*! \fn void Write_Projection_HDF5(hid_t file_id)
-     *  \brief Write projected density and temperature data to a file. */
-    void Write_Projection_HDF5(hid_t file_id);
+  /*! \fn void Write_Grid_Text(FILE *fp)
+   *  \brief Write the grid to a file, at the current simulation time. */
+  void Write_Grid_Text(FILE *fp);
 
-    /*! \fn void Write_Header_Rotated_HDF5(hid_t file_id)
-     *  \brief Write the relevant header info to the HDF5 file for rotated projection. */
-    void Write_Header_Rotated_HDF5(hid_t file_id);
+  /*! \fn void Write_Header_Binary(FILE *fp)
+   *  \brief Write the relevant header info to a binary output file. */
+  void Write_Header_Binary(FILE *fp);
 
-    /*! \fn void Write_Rotated_Projection_HDF5(hid_t file_id)
-     *  \brief Write rotated projected data to a file, at the current simulation time. */
-    void Write_Rotated_Projection_HDF5(hid_t file_id);
+  /*! \fn void Write_Grid_Binary(FILE *fp)
+   *  \brief Write the grid to a file, at the current simulation time. */
+  void Write_Grid_Binary(FILE *fp);
 
-    /*! \fn void Write_Slices_HDF5(hid_t file_id)
-     *  \brief Write xy, xz, and yz slices of all data to a file. */
-    void Write_Slices_HDF5(hid_t file_id);
+#ifdef HDF5
+  /*! \fn void Write_Header_HDF5(hid_t file_id)
+   *  \brief Write the relevant header info to the HDF5 file. */
+  void Write_Header_HDF5(hid_t file_id);
 
-#endif
+  /*! \fn void Write_Grid_HDF5(hid_t file_id)
+   *  \brief Write the grid to a file, at the current simulation time. */
+  void Write_Grid_HDF5(hid_t file_id);
 
-    /*! \fn void Read_Grid(struct parameters P)
-     *  \brief Read in grid data from an output file. */
-    void Read_Grid(struct parameters P);
+  /*! \fn void Write_Projection_HDF5(hid_t file_id)
+   *  \brief Write projected density and temperature data to a file. */
+  void Write_Projection_HDF5(hid_t file_id);
 
-    /*! \fn Read_Grid_Binary(FILE *fp)
-     *  \brief Read in grid data from a binary file. */
-    void Read_Grid_Binary(FILE *fp);
+  /*! \fn void Write_Header_Rotated_HDF5(hid_t file_id)
+   *  \brief Write the relevant header info to the HDF5 file for rotated
+   * projection. */
+  void Write_Header_Rotated_HDF5(hid_t file_id);
 
-#ifdef HDF5
-    /*! \fn void Read_Grid_HDF5(hid_t file_id)
-     *  \brief Read in grid data from an hdf5 file. */
-    void Read_Grid_HDF5(hid_t file_id, struct parameters P);
-#endif
+  /*! \fn void Write_Rotated_Projection_HDF5(hid_t file_id)
+   *  \brief Write rotated projected data to a file, at the current simulation
+   * time. */
+  void Write_Rotated_Projection_HDF5(hid_t file_id);
 
-    /*! \fn void Reset(void)
-     *  \brief Reset the Grid3D class. */
-    void Reset(void);
-
-    /*! \fn void FreeMemory(void)
-     *  \brief Free the memory for the density array. */
-    void FreeMemory(void);
-
-    /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P)
-     *  \brief Constant gas properties. */
-    void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz);
-
-    /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A)
-     *  \brief Sine wave perturbation. */
-    void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A);
-
-    /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A)
-     *  \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */
-    void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A);
+  /*! \fn void Write_Slices_HDF5(hid_t file_id)
+   *  \brief Write xy, xz, and yz slices of all data to a file. */
+  void Write_Slices_HDF5(hid_t file_id);
 
-    /*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l,
-                         Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r,
-                         Real diaph)
-     *  \brief Initialize the grid with a Riemann problem. */
-    void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l,
-                 Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r,
-                 Real diaph);
+#endif
 
-    /*! \fn void Shu_Osher()
-     *  \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone 2008, Section 8.1 */
-    void Shu_Osher();
+  /*! \fn void Read_Grid(struct Parameters P)
+   *  \brief Read in grid data from 1-per-process output files. */
+  void Read_Grid(struct Parameters P);
 
-    /*! \fn void Blast_1D()
-     *  \brief Initialize the grid with two interacting blast waves. See Stone 2008, Section 8.1.*/
-    void Blast_1D();
+  /*! \fn void Read_Grid_Cat(struct Parameters P)
+   *  \brief Read in grid data from a single concatenated output file. */
+  void Read_Grid_Cat(struct Parameters P);
 
-    /*! \fn void KH()
-    *  \brief Initialize the grid with a Kelvin-Helmholtz instability with a discontinuous interface. */
-    void KH();
-
-    /*! \fn void KH_res_ind()
-     *  \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes are resolution independent. */
-    void KH_res_ind();
-
-    /*! \fn void Rayleigh_Taylor()
-    *  \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */
-    void Rayleigh_Taylor();
-
-    /*! \fn void Gresho()
-     *  \brief Initialize the grid with the 2D Gresho problem described in LW03. */
-    void Gresho();
-
-    /*! \fn void Implosion_2D()
-     *  \brief Implosion test described in Liska, 2003. */
-    void Implosion_2D();
-
-    /*! \fn void Explosion_2D()
-     *  \brief Explosion test described in Liska, 2003. */
-    void Explosion_2D();
-
-    /*! \fn void Noh_2D()
-     *  \brief Noh test described in Liska, 2003. */
-    void Noh_2D();
-
-    /*! \fn void Noh_3D()
-     *  \brief Noh test described in Stone, 2008. */
-    void Noh_3D();
-
-    /*! \fn void Disk_2D()
-     *  \brief Initialize the grid with a 2D disk following a Kuzmin profile. */
-    void Disk_2D();
-
-    /*! \fn void Disk_3D(parameters P)
-     *  \brief Initialize the grid with a 3D disk following a Miyamoto-Nagai profile. */
-    void Disk_3D(parameters P);
-
-    /*! \fn void Set_Boundary_Conditions(parameters P)
-     *  \brief Set the boundary conditions based on info in the parameters structure. */
-    void Set_Boundary_Conditions(parameters P);
-
-    /*! \fn void Set_Boundary_Conditions_Grid(parameters P)
-     *  \brief Set the boundary conditions for all components based on info in the parameters structure. */
-    void Set_Boundary_Conditions_Grid( parameters P);
-
-    /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P)
-     *  \brief Check for custom boundary conditions */
-    int Check_Custom_Boundary(int *flags, struct parameters P);
-
-    /*! \fn void Set_Boundaries(int dir, int flags[])
-     *  \brief Apply boundary conditions to the grid. */
-    void Set_Boundaries(int dir, int flags[]);
-
-    /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax)
-     *  \brief Set the extents of the ghost region we are initializing. */
-    void Set_Boundary_Extents(int dir, int *imin, int *imax);
-
-    /*! \fn void Custom_Boundary(char bcnd[MAXLEN])
-     *  \brief Select appropriate custom boundary function. */
-    void Custom_Boundary(char bcnd[MAXLEN]);
-
-    /*! \fn void Noh_Boundary()
-     *  \brief Apply analytic boundary conditions to +x, +y (and +z) faces,
-        as per the Noh problem in Liska, 2003, or in Stone, 2008. */
-    void Noh_Boundary();
-
-    /*! \fn void Spherical_Overpressure_3D()
-     *  \brief Initialize the grid with a 3D spherical overdensity and overpressue. */
-    void Spherical_Overpressure_3D();
-
-    /*! \fn void Spherical_Overpressure_3D()
-     *  \brief Initialize the grid with a 3D spherical overdensity for gravitational collapse */
-    void Spherical_Overdensity_3D();
-
-    void Clouds();
-    
-    void Uniform_Grid();
-
-    void Zeldovich_Pancake( struct parameters P );
-
-    void Chemistry_Test( struct parameters P );
-
-
-#ifdef   MPI_CHOLLA
-    void Set_Boundaries_MPI(struct parameters P);
-    void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P);
-    void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags);
-    void Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags);
-    void Unload_MPI_Comm_Buffers(int index);
-
-    int Load_Hydro_DeviceBuffer_X0(Real *buffer);
-    int Load_Hydro_DeviceBuffer_X1(Real *buffer);
-    int Load_Hydro_DeviceBuffer_Y0(Real *buffer);
-    int Load_Hydro_DeviceBuffer_Y1(Real *buffer);
-    int Load_Hydro_DeviceBuffer_Z0(Real *buffer);
-    int Load_Hydro_DeviceBuffer_Z1(Real *buffer);
+  /*! \fn Read_Grid_Binary(FILE *fp)
+   *  \brief Read in grid data from a binary file. */
+  void Read_Grid_Binary(FILE *fp);
 
-    void Unload_Hydro_DeviceBuffer_X0(Real *buffer);
-    void Unload_Hydro_DeviceBuffer_X1(Real *buffer);
-    void Unload_Hydro_DeviceBuffer_Y0(Real *buffer);
-    void Unload_Hydro_DeviceBuffer_Y1(Real *buffer);
-    void Unload_Hydro_DeviceBuffer_Z0(Real *buffer);
-    void Unload_Hydro_DeviceBuffer_Z1(Real *buffer);
+#ifdef HDF5
+  /*! \fn void Read_Grid_HDF5(hid_t file_id)
+   *  \brief Read in grid data from an hdf5 file. */
+  void Read_Grid_HDF5(hid_t file_id, struct Parameters P);
+#endif
+
+  /*! \fn void Reset(void)
+   *  \brief Reset the Grid3D class. */
+  void Reset(void);
+
+  /*! \fn void FreeMemory(void)
+   *  \brief Free the memory for the density array. */
+  void FreeMemory(void);
+
+  /*!
+   * \brief Constant gas properties.
+   *
+   * \param[in] P the parameters struct.
+   */
+  void Constant(Parameters const &P);
+
+  /*!
+   * \brief Sine wave perturbation.
+   *
+   * \param[in] P the parameters struct.
+   */
+  void Sound_Wave(Parameters const &P);
+
+  /*!
+   * \brief Initialize the grid with a simple linear wave.
+   *
+   * \param[in] P the parameters struct.
+   */
+  void Linear_Wave(Parameters const &P);
+
+  /*!
+   * \brief Square wave density perturbation with amplitude A*rho in pressure
+   * equilibrium.
+   *
+   * \param[in] P the parameters struct.
+   */
+  void Square_Wave(Parameters const &P);
+
+  /*!
+   * \brief Initialize the grid with a Riemann problem.
+   *
+   * \param[in] P the parameters struct.
+   */
+  void Riemann(Parameters const &P);
+
+  /*! \fn void Shu_Osher()
+   *  \brief Initialize the grid with the Shu-Osher shock tube problem. See
+   * Stone 2008, Section 8.1 */
+  void Shu_Osher();
+
+  /*! \fn void Blast_1D()
+   *  \brief Initialize the grid with two interacting blast waves. See Stone
+   * 2008, Section 8.1.*/
+  void Blast_1D();
+
+  /*! \fn void KH()
+   *  \brief Initialize the grid with a Kelvin-Helmholtz instability with a
+   * discontinuous interface. */
+  void KH();
+
+  /*! \fn void KH_res_ind()
+   *  \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes
+   * are resolution independent. */
+  void KH_res_ind();
+
+  /*! \fn void Rayleigh_Taylor()
+   *  \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */
+  void Rayleigh_Taylor();
+
+  /*! \fn void Gresho()
+   *  \brief Initialize the grid with the 2D Gresho problem described in LW03.
+   */
+  void Gresho();
+
+  /*! \fn void Implosion_2D()
+   *  \brief Implosion test described in Liska, 2003. */
+  void Implosion_2D();
+
+  /*! \fn void Explosion_2D()
+   *  \brief Explosion test described in Liska, 2003. */
+  void Explosion_2D();
+
+  /*! \fn void Noh_2D()
+   *  \brief Noh test described in Liska, 2003. */
+  void Noh_2D();
+
+  /*! \fn void Noh_3D()
+   *  \brief Noh test described in Stone, 2008. */
+  void Noh_3D();
+
+  /*! \fn void Disk_2D()
+   *  \brief Initialize the grid with a 2D disk following a Kuzmin profile. */
+  void Disk_2D();
+
+  /*! \fn void Disk_3D(Parameters P )
+   *  \brief Initialize the grid with a 3D disk following a Miyamoto-Nagai
+   * profile. */
+  void Disk_3D(Parameters P);
+
+  /*! \fn void Set_Boundary_Conditions(Parameters P )
+   *  \brief Set the boundary conditions based on info in the parameters
+   * structure. */
+  void Set_Boundary_Conditions(Parameters P);
+
+  /*! \fn void Set_Boundary_Conditions_Grid(Parameters P )
+   *  \brief Set the boundary conditions for all components based on info in the
+   * parameters structure. */
+  void Set_Boundary_Conditions_Grid(Parameters P);
+
+  /*! \fn int Check_Custom_Boundary(int *flags, struct Parameters P)
+   *  \brief Check for custom boundary conditions */
+  int Check_Custom_Boundary(int *flags, struct Parameters P);
+
+  /*! \fn void Set_Boundaries(int dir, int flags[])
+   *  \brief Apply boundary conditions to the grid. */
+  void Set_Boundaries(int dir, int flags[]);
+
+  /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax)
+   *  \brief Set the extents of the ghost region we are initializing. */
+  void Set_Boundary_Extents(int dir, int *imin, int *imax);
+
+  /*! \fn void Custom_Boundary(char bcnd[MAXLEN])
+   *  \brief Select appropriate custom boundary function. */
+  void Custom_Boundary(char bcnd[MAXLEN]);
+
+  /*! \fn void Wind_Boundary()
+   *  \brief Apply a constant wind to the -x boundary. */
+  void Wind_Boundary();
+
+  /*! \fn void Noh_Boundary()
+   *  \brief Apply analytic boundary conditions to +x, +y (and +z) faces,
+      as per the Noh problem in Liska, 2003, or in Stone, 2008. */
+  void Noh_Boundary();
+
+  /*! \fn void Spherical_Overpressure_3D()
+   *  \brief Initialize the grid with a 3D spherical overdensity and
+   * overpressue. */
+  void Spherical_Overpressure_3D();
+
+  /*! \fn void Spherical_Overpressure_3D()
+   *  \brief Initialize the grid with a 3D spherical overdensity for
+   * gravitational collapse */
+  void Spherical_Overdensity_3D();
+
+  void Clouds();
+
+  void Uniform_Grid();
+
+  void Zeldovich_Pancake(struct Parameters P);
+
+  void Chemistry_Test(struct Parameters P);
+
+#ifdef MHD
+  /*!
+   * \brief Initialize the grid with a circularly polarized Alfven wave. Only options are angle and Vx. See [Gardiner &
+   * Stone 2008](https://arxiv.org/abs/0712.2634) pages 4134-4135 for details.
+   *
+   * \param P The parameters. Only uses Vx, pitch, and yaw
+   */
+  void Circularly_Polarized_Alfven_Wave(struct Parameters const P);
+
+  /*!
+   * \brief Initialize the grid with a advecting field loop. See [Gardiner &
+   * Stone 2008](https://arxiv.org/abs/0712.2634).
+   *
+   * \param P The parameters object
+   */
+  void Advecting_Field_Loop(struct Parameters const P);
+
+  /*!
+   * \brief Initialize the grid with a spherical MHD blast wave. See [Gardiner &
+   * Stone 2008](https://arxiv.org/abs/0712.2634) for details.
+   *
+   * \param P The parameters struct
+   */
+  void MHD_Spherical_Blast(struct Parameters const P);
+
+  /*!
+   * \brief Initialize the grid with the Orszag-Tang Vortex. See [Gardiner & Stone
+   * 2008](https://arxiv.org/abs/0712.2634)
+   *
+   * \param P The parameters.
+   */
+  void Orszag_Tang_Vortex();
+#endif  // MHD
+
+#ifdef MPI_CHOLLA
+  void Set_Boundaries_MPI(struct Parameters P);
+  void Set_Boundaries_MPI_BLOCK(int *flags, struct Parameters P);
+  void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags);
+  void Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags);
+  void Unload_MPI_Comm_Buffers(int index);
+
+  int Load_Hydro_DeviceBuffer_X0(Real *buffer);
+  int Load_Hydro_DeviceBuffer_X1(Real *buffer);
+  int Load_Hydro_DeviceBuffer_Y0(Real *buffer);
+  int Load_Hydro_DeviceBuffer_Y1(Real *buffer);
+  int Load_Hydro_DeviceBuffer_Z0(Real *buffer);
+  int Load_Hydro_DeviceBuffer_Z1(Real *buffer);
+
+  void Unload_Hydro_DeviceBuffer_X0(Real *buffer);
+  void Unload_Hydro_DeviceBuffer_X1(Real *buffer);
+  void Unload_Hydro_DeviceBuffer_Y0(Real *buffer);
+  void Unload_Hydro_DeviceBuffer_Y1(Real *buffer);
+  void Unload_Hydro_DeviceBuffer_Z0(Real *buffer);
+  void Unload_Hydro_DeviceBuffer_Z1(Real *buffer);
 #endif /*MPI_CHOLLA*/
 
-  #ifdef GRAVITY
-  void Initialize_Gravity( struct parameters *P );
-  void Compute_Gravitational_Potential( struct parameters *P );
-  void Copy_Hydro_Density_to_Gravity_Function( int g_start, int g_end);
+#ifdef GRAVITY
+  void Initialize_Gravity(struct Parameters *P);
+  void Compute_Gravitational_Potential(struct Parameters *P);
+  void Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end);
   void Copy_Hydro_Density_to_Gravity();
-  void Extrapolate_Grav_Potential_Function( int g_start, int g_end );
+  void Extrapolate_Grav_Potential_Function(int g_start, int g_end);
   void Extrapolate_Grav_Potential();
-  void Set_Potential_Boundaries_Periodic( int direction, int side, int *flags );
-  int Load_Gravity_Potential_To_Buffer( int direction, int side, Real *buffer, int buffer_start  );
-  void Unload_Gravity_Potential_from_Buffer( int direction, int side, Real *buffer, int buffer_start  );
-  void Set_Potential_Boundaries_Isolated( int direction, int side, int *flags );
-  void Compute_Potential_Boundaries_Isolated( int dir, struct parameters *P );
-  void Compute_Potential_Isolated_Boundary( int direction, int side, int bc_potential_type );
+  void Set_Potential_Boundaries_Periodic(int direction, int side, int *flags);
+  int Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, int buffer_start);
+  void Unload_Gravity_Potential_from_Buffer(int direction, int side, Real *buffer, int buffer_start);
+  void Set_Potential_Boundaries_Isolated(int direction, int side, int *flags);
+  void Compute_Potential_Boundaries_Isolated(int dir, struct Parameters *P);
+  void Compute_Potential_Isolated_Boundary(int direction, int side, int bc_potential_type);
   #ifdef SOR
-  void Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P );
-  int Load_Poisson_Boundary_To_Buffer( int direction, int side, Real *buffer  );
-  void Unload_Poisson_Boundary_From_Buffer( int direction, int side, Real *buffer_host  );
+  void Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct Parameters *P);
+  int Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer);
+  void Unload_Poisson_Boundary_From_Buffer(int direction, int side, Real *buffer_host);
   #endif
   #ifdef GRAVITY_GPU
   void Copy_Hydro_Density_to_Gravity_GPU();
   void Extrapolate_Grav_Potential_GPU();
-  int Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start  );
-  void Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start  );
-  void Set_Potential_Boundaries_Isolated_GPU( int direction, int side, int *flags );
-  void Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int *flags );
+  int Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start);
+  void Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start);
+  void Set_Potential_Boundaries_Isolated_GPU(int direction, int side, int *flags);
+  void Set_Potential_Boundaries_Periodic_GPU(int direction, int side, int *flags);
   #endif
 
-  #endif//GRAVITY
+#endif  // GRAVITY
 
-  #ifdef GRAVITY_ANALYTIC_COMP
-  void Add_Analytic_Potential(struct parameters *P);
-  void Add_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal);
-  #endif //GRAVITY_ANALYTIC_COMP
+#ifdef GRAVITY_ANALYTIC_COMP
+  void Add_Analytic_Potential();
+  void Add_Analytic_Potential(int g_start, int g_end);
+  void Setup_Analytic_Potential(struct Parameters *P);
+  void Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy &gal);
+  #ifdef GRAVITY_GPU
+  void Add_Analytic_Potential_GPU();
+  #endif
+#endif  // GRAVITY_ANALYTIC_COMP
 
-  #ifdef PARTICLES
-  void Initialize_Particles( struct parameters *P );
+#ifdef PARTICLES
+  void Initialize_Particles(struct Parameters *P);
   void Initialize_Uniform_Particles();
-  void Copy_Particles_Density_function( int g_start, int g_end );
+  void Copy_Particles_Density_function(int g_start, int g_end);
   void Copy_Particles_Density();
-  void Copy_Particles_Density_to_Gravity(struct parameters P);
-  void Set_Particles_Density_Boundaries_Periodic( int direction, int side );
-  void Transfer_Particles_Boundaries( struct parameters P );
-  Real Update_Grid_and_Particles_KDK( struct parameters P );
-  void Set_Particles_Boundary( int dir, int side);
-  void Set_Particles_Open_Boundary(int dir, int side);
+  void Copy_Particles_Density_to_Gravity(struct Parameters P);
+  void Set_Particles_Density_Boundaries_Periodic(int direction, int side);
+  void Transfer_Particles_Boundaries(struct Parameters P);
+  Real Update_Grid_and_Particles_KDK(struct Parameters P);
+  void Set_Particles_Boundary(int dir, int side);
+  #ifdef PARTICLES_CPU
+  void Set_Particles_Open_Boundary_CPU(int dir, int side);
+  #endif
   #ifdef MPI_CHOLLA
-  int Load_Particles_Density_Boundary_to_Buffer( int direction, int side, Real *buffer );
-  void Unload_Particles_Density_Boundary_From_Buffer( int direction, int side, Real *buffer );
-  void Load_and_Send_Particles_X0( int ireq_n_particles, int ireq_particles_transfer );
-  void Load_and_Send_Particles_X1( int ireq_n_particles, int ireq_particles_transfer );
-  void Load_and_Send_Particles_Y0( int ireq_n_particles, int ireq_particles_transfer );
-  void Load_and_Send_Particles_Y1( int ireq_n_particles, int ireq_particles_transfer );
-  void Load_and_Send_Particles_Z0( int ireq_n_particles, int ireq_particles_transfer );
-  void Load_and_Send_Particles_Z1( int ireq_n_particles, int ireq_particles_transfer );
-  void Unload_Particles_from_Buffer_X0( int *flags );
-  void Unload_Particles_from_Buffer_X1( int *flags );
-  void Unload_Particles_from_Buffer_Y0( int *flags );
-  void Unload_Particles_from_Buffer_Y1( int *flags );
-  void Unload_Particles_from_Buffer_Z0( int *flags );
-  void Unload_Particles_from_Buffer_Z1( int *flags );
+  int Load_Particles_Density_Boundary_to_Buffer(int direction, int side, Real *buffer);
+  void Unload_Particles_Density_Boundary_From_Buffer(int direction, int side, Real *buffer);
+  void Load_and_Send_Particles_X0(int ireq_n_particles, int ireq_particles_transfer);
+  void Load_and_Send_Particles_X1(int ireq_n_particles, int ireq_particles_transfer);
+  void Load_and_Send_Particles_Y0(int ireq_n_particles, int ireq_particles_transfer);
+  void Load_and_Send_Particles_Y1(int ireq_n_particles, int ireq_particles_transfer);
+  void Load_and_Send_Particles_Z0(int ireq_n_particles, int ireq_particles_transfer);
+  void Load_and_Send_Particles_Z1(int ireq_n_particles, int ireq_particles_transfer);
+  void Unload_Particles_from_Buffer_X0(int *flags);
+  void Unload_Particles_from_Buffer_X1(int *flags);
+  void Unload_Particles_from_Buffer_Y0(int *flags);
+  void Unload_Particles_from_Buffer_Y1(int *flags);
+  void Unload_Particles_from_Buffer_Z0(int *flags);
+  void Unload_Particles_from_Buffer_Z1(int *flags);
   void Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, int *flags);
   void Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, int *ireq_particles_transfer);
   void Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags);
-  void Unload_Particles_From_Buffers_BLOCK(int index, int *flags );
+  void Unload_Particles_From_Buffers_BLOCK(int index, int *flags);
   void Finish_Particles_Transfer();
-  #endif//MPI_CHOLLA
-  void Transfer_Particles_Density_Boundaries( struct parameters P );
-  void Copy_Particles_Density_Buffer_Device_to_Host( int direction, int side, Real *buffer_d, Real *buffer_h );
-  // void Transfer_Particles_Boundaries( struct parameters P );
-  void WriteData_Particles(  struct parameters P, int nfile);
-  void OutputData_Particles(  struct parameters P, int nfile);
-  void Load_Particles_Data(  struct parameters P);
+  #endif  // MPI_CHOLLA
+  void Transfer_Particles_Density_Boundaries(struct Parameters P);
+  void Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, Real *buffer_d, Real *buffer_h);
+  // void Transfer_Particles_Boundaries( struct Parameters P );
+  void WriteData_Particles(struct Parameters P, int nfile);
+  void OutputData_Particles(struct Parameters P, int nfile);
+  void Load_Particles_Data(struct Parameters P);
   #ifdef HDF5
-  void Write_Particles_Header_HDF5( hid_t file_id);
-  void Write_Particles_Data_HDF5( hid_t file_id);
+  void Write_Particles_Header_HDF5(hid_t file_id);
+  void Write_Particles_Data_HDF5(hid_t file_id);
   void Load_Particles_Data_HDF5(hid_t file_id, int nfile);
-  #endif//HDF5
-  void Get_Gravity_Field_Particles_function( int g_start, int g_end );
+  #endif  // HDF5
+  void Get_Gravity_Field_Particles_function(int g_start, int g_end);
   void Get_Gravity_Field_Particles();
-  void Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end );
+  void Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end);
   void Get_Gravity_CIC();
-  void Advance_Particles_KDK_Step1( );
-  void Advance_Particles_KDK_Step2( );
-  void Advance_Particles_KDK_Step1_function( part_int_t p_start, part_int_t p_end );
-  void Advance_Particles_KDK_Step2_function( part_int_t p_start, part_int_t p_end );
+  void Advance_Particles_KDK_Step1();
+  void Advance_Particles_KDK_Step2();
+  void Advance_Particles_KDK_Step1_function(part_int_t p_start, part_int_t p_end);
+  void Advance_Particles_KDK_Step2_function(part_int_t p_start, part_int_t p_end);
   void Get_Particles_Acceleration();
-  void Advance_Particles( int N_KDK_step );
-  Real Calc_Particles_dt_function( part_int_t p_start, part_int_t p_end );
+  void Advance_Particles(int N_KDK_step);
+  Real Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end);
   Real Calc_Particles_dt();
   #ifdef PARTICLES_GPU
   Real Calc_Particles_dt_GPU();
   void Advance_Particles_KDK_Step1_GPU();
   void Advance_Particles_KDK_Step2_GPU();
-  void Set_Particles_Boundary_GPU( int dir, int side);
-  void Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int side );
-  #endif//PARTICLES_GPU
+  void Set_Particles_Boundary_GPU(int dir, int side);
+  void Set_Particles_Density_Boundaries_Periodic_GPU(int direction, int side);
+  #endif  // PARTICLES_GPU
   #ifdef GRAVITY_GPU
   void Copy_Potential_From_GPU();
   void Copy_Particles_Density_to_GPU();
   void Copy_Particles_Density_GPU();
-  int Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer  );
-  void Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer  );
-  #endif//GRAVITY_GPU
-  #endif//PARTICLES
-
-  #ifdef COSMOLOGY
-  void Initialize_Cosmology( struct parameters *P );
-  void Change_DM_Frame_System( bool forward );
-  void Change_GAS_Frame_System( bool forward );
-  void Change_GAS_Frame_System_GPU( bool forward );
-  void Change_Cosmological_Frame_Sytem( bool forward );
-  void Advance_Particles_KDK_Cosmo_Step1_function( part_int_t p_start, part_int_t p_end );
-  void Advance_Particles_KDK_Cosmo_Step2_function( part_int_t p_start, part_int_t p_end );
-  Real Calc_Particles_dt_Cosmo_function( part_int_t p_start, part_int_t p_end );
+  int Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, Real *buffer);
+  void Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, int side, Real *buffer);
+  #endif  // GRAVITY_GPU
+#endif    // PARTICLES
+
+#ifdef COSMOLOGY
+  void Initialize_Cosmology(struct Parameters *P);
+  void Change_DM_Frame_System(bool forward);
+  void Change_GAS_Frame_System(bool forward);
+  void Change_GAS_Frame_System_GPU(bool forward);
+  void Change_Cosmological_Frame_Sytem(bool forward);
+  void Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, part_int_t p_end);
+  void Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, part_int_t p_end);
+  Real Calc_Particles_dt_Cosmo_function(part_int_t p_start, part_int_t p_end);
   Real Calc_Particles_dt_Cosmo();
   #ifdef PARTICLES_GPU
   void Advance_Particles_KDK_Cosmo_Step1_GPU();
   void Advance_Particles_KDK_Cosmo_Step2_GPU();
-  #endif//PARTICLES_GPU
-  #endif//COSMOLOGY
+  #endif  // PARTICLES_GPU
+#endif    // COSMOLOGY
 
-  #ifdef COOLING_GRACKLE
-  void Initialize_Grackle( struct parameters *P );
+#ifdef COOLING_GRACKLE
+  void Initialize_Grackle(struct Parameters *P);
   void Allocate_Memory_Grackle();
   void Initialize_Fields_Grackle();
-  void Copy_Fields_To_Grackle_function( int g_start, int g_end );
+  void Copy_Fields_To_Grackle_function(int g_start, int g_end);
   void Copy_Fields_To_Grackle();
-  void Update_Internal_Energy_function( int g_start, int g_end );
+  void Update_Internal_Energy_function(int g_start, int g_end);
   void Update_Internal_Energy();
   void Do_Cooling_Step_Grackle();
-  #endif
+#endif
 
-  #ifdef CHEMISTRY_GPU
-  void Initialize_Chemistry( struct parameters *P );
-  void Compute_Gas_Temperature(  Real *temperature, bool convert_cosmo_units  );
+#ifdef CHEMISTRY_GPU
+  void Initialize_Chemistry(struct Parameters *P);
+  void Compute_Gas_Temperature(Real *temperature, bool convert_cosmo_units);
   void Update_Chemistry();
-  #endif
+#endif
 
-  #ifdef ANALYSIS
-  void Initialize_Analysis_Module( struct parameters *P );
-  void Compute_and_Output_Analysis( struct parameters *P );
-  void Output_Analysis( struct parameters *P );
-  void Write_Analysis_Header_HDF5( hid_t file_id );
-  void Write_Analysis_Data_HDF5( hid_t file_id );
+#ifdef ANALYSIS
+  void Initialize_AnalysisModule(struct Parameters *P);
+  void Compute_and_Output_Analysis(struct Parameters *P);
+  void Output_Analysis(struct Parameters *P);
+  void Write_Analysis_Header_HDF5(hid_t file_id);
+  void Write_Analysis_Data_HDF5(hid_t file_id);
 
   #ifdef PHASE_DIAGRAM
   void Compute_Phase_Diagram();
   #endif
 
   #ifdef LYA_STATISTICS
-  void Populate_Lya_Skewers_Local( int axis );
-  void Compute_Transmitted_Flux_Skewer( int skewer_id, int axis );
-  void Compute_Lya_Statistics( );
-  void Compute_Flux_Power_Spectrum_Skewer( int skewer_id, int axis );
-  void Initialize_Power_Spectrum_Measurements( int axis );
-  #ifdef OUTPUT_SKEWERS
-  void Output_Skewers_File( struct parameters *P );
-  void Write_Skewers_Header_HDF5( hid_t file_id );
-  void Write_Skewers_Data_HDF5( hid_t file_id );
-  #endif
-  #endif//LYA_STATISTICS
-  #endif//ANALYSIS
-
-  #ifdef PARTICLES
-  #ifdef DE
-  #ifdef PARTICLE_AGE
-  void Cluster_Feedback();
-  void Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end);
-  #endif
-  #endif
-  #endif
-
+  void Populate_Lya_Skewers_Local(int axis);
+  void Compute_Transmitted_Flux_Skewer(int skewer_id, int axis);
+  void Compute_Lya_Statistics();
+  void Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis);
+  void Initialize_Power_Spectrum_Measurements(int axis);
+    #ifdef OUTPUT_SKEWERS
+  void Output_Skewers_File(struct Parameters *P);
+  void Write_Skewers_Header_HDF5(hid_t file_id);
+  void Write_Skewers_Data_HDF5(hid_t file_id);
+    #endif
+  #endif  // LYA_STATISTICS
+#endif    // ANALYSIS
 };
 
 // typedef for Grid3D_PointerMemberFunction
 typedef void (Grid3D::*Grid3D_PMF_UnloadHydroBuffer)(Real *);
-typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential)
-               (int, int, Real *, int);
-typedef void (Grid3D::*Grid3D_PMF_UnloadParticleDensity)
-               (int, int, Real *);
+typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential)(int, int, Real *, int);
+typedef void (Grid3D::*Grid3D_PMF_UnloadParticleDensity)(int, int, Real *);
 
-#endif //GRID3D_H
+#endif  // GRID3D_H
diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h
new file mode 100644
index 000000000..15e1d604a
--- /dev/null
+++ b/src/grid/grid_enum.h
@@ -0,0 +1,113 @@
+#pragma once
+
+// An enum which holds offsets for grid quantities
+// In the final form of this approach, this file will also set nfields (not yet)
+// and NSCALARS (done) so that adding a field only requires registering it here:
+// grid knows to allocate memory based on nfields and NSCALARS
+// and values can be accessed with density[id + ncells*grid_enum::enum_name]
+// example: C.device[id + H.n_cells*grid_enum::basic_scalar]
+
+// enum notes:
+// For advanced devs: must be "unscoped" to be implicitly treated as int: this
+// means cannot use "enum class" or "enum struct" Wrapped in namespace to give
+// it an effective scope to prevent collisions enum values (i.e. density) belong
+// to their enclosing scope, which necessitates the namespace wrapping
+// --otherwise "density" would be available in global scope
+// ": int" forces underlying type to be int
+
+namespace grid_enum
+{
+enum : int {
+
+  // Don't change order of hydro quantities until all of hydro is made
+  // consistent with grid_enum (if ever) because enum values depend on order
+  density,
+  momentum_x,
+  momentum_y,
+  momentum_z,
+  Energy,
+
+  // Code assumes scalars are a contiguous block
+  // Always define scalar, scalar_minus_1, finalscalar_plus_1, finalscalar to
+  // compute NSCALARS
+  scalar,
+  scalar_minus_1 = scalar - 1,  // so that next enum item starts at same index as scalar
+
+#ifdef SCALAR
+  // Add scalars here, wrapped appropriately with ifdefs:
+  #ifdef BASIC_SCALAR
+  basic_scalar,
+  #endif
+
+  #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU)
+  HI_density,
+  HII_density,
+  HeI_density,
+  HeII_density,
+  HeIII_density,
+  e_density,
+    #ifdef GRACKLE_METALS
+  metal_density,
+    #endif
+  #endif
+
+  #ifdef DUST
+  dust_density,
+  #endif  // DUST
+
+#endif  // SCALAR
+
+  finalscalar_plus_1,                    // needed to calculate NSCALARS
+  finalscalar = finalscalar_plus_1 - 1,  // resets enum to finalscalar so fields afterwards are correct
+// so that anything after starts with scalar + NSCALARS
+
+#ifdef MHD
+  magnetic_x,
+  magnetic_y,
+  magnetic_z,
+#endif
+#ifdef DE
+  GasEnergy,
+#endif
+  num_fields,
+
+  // Aliases and manually computed enums
+  nscalars = finalscalar_plus_1 - scalar,
+
+#ifdef MHD
+  num_flux_fields      = num_fields - 1,
+  num_interface_fields = num_fields - 1,
+#else
+  num_flux_fields      = num_fields,
+  num_interface_fields = num_fields,
+#endif  // MHD
+
+#ifdef MHD
+  magnetic_start = magnetic_x,
+  magnetic_end   = magnetic_z,
+
+  ct_elec_x = 0,
+  ct_elec_y = 1,
+  ct_elec_z = 2,
+
+  // Note that the direction of the flux, the suffix _? indicates the direction
+  // of the electric field, not the magnetic flux
+  fluxX_magnetic_z = magnetic_start,
+  fluxX_magnetic_y = magnetic_start + 1,
+  fluxY_magnetic_x = magnetic_start,
+  fluxY_magnetic_z = magnetic_start + 1,
+  fluxZ_magnetic_y = magnetic_start,
+  fluxZ_magnetic_x = magnetic_start + 1,
+
+  Q_x_magnetic_y = magnetic_start,
+  Q_x_magnetic_z = magnetic_start + 1,
+  Q_y_magnetic_z = magnetic_start,
+  Q_y_magnetic_x = magnetic_start + 1,
+  Q_z_magnetic_x = magnetic_start,
+  Q_z_magnetic_y = magnetic_start + 1
+#endif  // MHD
+
+};
+}  // namespace grid_enum
+
+#define NSCALARS grid_enum::nscalars
diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp
index 1a0a03381..af558be8f 100644
--- a/src/grid/initial_conditions.cpp
+++ b/src/grid/initial_conditions.cpp
@@ -1,106 +1,112 @@
 /*! \file initial_conditions.cpp
  *  \brief Definitions of initial conditions for different tests.
-           Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k.
-           Functions are members of the Grid3D class. */
+           Note that the grid is mapped to 1D as i + (x_dim)*j +
+ (x_dim*y_dim)*k. Functions are members of the Grid3D class. */
 
-
-#include <stdlib.h>
 #include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <time.h>
+
+#include <algorithm>
+#include <cmath>
+#include <fstream>
+#include <iostream>
+
 #include "../global/global.h"
 #include "../grid/grid3D.h"
-#include "../mpi/mpi_routines.h"
 #include "../io/io.h"
+#include "../mpi/mpi_routines.h"
 #include "../utils/error_handling.h"
-#include <stdio.h>
-#include <cmath>
-#include <iostream>
-#include <fstream>
-
-using namespace std;
-
-/*! \fn void Set_Initial_Conditions(parameters P)
- *  \brief Set the initial conditions based on info in the parameters structure. */
-void Grid3D::Set_Initial_Conditions(parameters P) {
-
+#include "../utils/hydro_utilities.h"
+#include "../utils/math_utilities.h"
+#include "../utils/mhd_utilities.h"
+
+/*! \fn void Set_Initial_Conditions(Parameters P )
+ *  \brief Set the initial conditions based on info in the parameters structure.
+ */
+void Grid3D::Set_Initial_Conditions(Parameters P)
+{
   Set_Domain_Properties(P);
   Set_Gammas(P.gamma);
 
-  if (strcmp(P.init, "Constant")==0) {
-    Constant(P.rho, P.vx, P.vy, P.vz, P.P, P.Bx, P.By, P.Bz);
-  } else if (strcmp(P.init, "Sound_Wave")==0) {
-    Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A);
-  } else if (strcmp(P.init, "Square_Wave")==0) {
-    Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A);
-  } else if (strcmp(P.init, "Riemann")==0) {
-    Riemann(P.rho_l, P.vx_l, P.vy_l, P.vz_l, P.P_l, P.Bx_l, P.By_l, P.Bz_l,
-            P.rho_r, P.vx_r, P.vy_r, P.vz_r, P.P_r, P.Bx_r, P.By_r, P.Bz_r,
-            P.diaph);
-  } else if (strcmp(P.init, "Shu_Osher")==0) {
+  if (strcmp(P.init, "Constant") == 0) {
+    Constant(P);
+  } else if (strcmp(P.init, "Sound_Wave") == 0) {
+    Sound_Wave(P);
+  } else if (strcmp(P.init, "Linear_Wave") == 0) {
+    Linear_Wave(P);
+  } else if (strcmp(P.init, "Square_Wave") == 0) {
+    Square_Wave(P);
+  } else if (strcmp(P.init, "Riemann") == 0) {
+    Riemann(P);
+  } else if (strcmp(P.init, "Shu_Osher") == 0) {
     Shu_Osher();
-  } else if (strcmp(P.init, "Blast_1D")==0) {
+  } else if (strcmp(P.init, "Blast_1D") == 0) {
     Blast_1D();
-  } else if (strcmp(P.init, "KH")==0) {
+  } else if (strcmp(P.init, "KH") == 0) {
     KH();
-  } else if (strcmp(P.init, "KH_res_ind")==0) {
+  } else if (strcmp(P.init, "KH_res_ind") == 0) {
     KH_res_ind();
-  } else if (strcmp(P.init, "Rayleigh_Taylor")==0) {
+  } else if (strcmp(P.init, "Rayleigh_Taylor") == 0) {
     Rayleigh_Taylor();
-  } else if (strcmp(P.init, "Implosion_2D")==0) {
+  } else if (strcmp(P.init, "Implosion_2D") == 0) {
     Implosion_2D();
-  } else if (strcmp(P.init, "Gresho")==0) {
+  } else if (strcmp(P.init, "Gresho") == 0) {
     Gresho();
-  } else if (strcmp(P.init, "Noh_2D")==0) {
+  } else if (strcmp(P.init, "Noh_2D") == 0) {
     Noh_2D();
-  } else if (strcmp(P.init, "Noh_3D")==0) {
+  } else if (strcmp(P.init, "Noh_3D") == 0) {
     Noh_3D();
-  } else if (strcmp(P.init, "Disk_2D")==0) {
+  } else if (strcmp(P.init, "Disk_2D") == 0) {
     Disk_2D();
-  } else if (strcmp(P.init, "Disk_3D")==0) {
-    Disk_3D(P);
-  } else if (strcmp(P.init, "Disk_3D_particles")==0) {
-    #ifndef ONLY_PARTICLES
+  } else if (strcmp(P.init, "Disk_3D") == 0 || strcmp(P.init, "Disk_3D_particles") == 0) {
     Disk_3D(P);
-    #else
-    // Initialize a m hydro grid when only integrating particles
-    Uniform_Grid();
-    #endif
-  } else if (strcmp(P.init, "Spherical_Overpressure_3D")==0) {
+  } else if (strcmp(P.init, "Spherical_Overpressure_3D") == 0) {
     Spherical_Overpressure_3D();
-  } else if (strcmp(P.init, "Spherical_Overdensity_3D")==0) {
-    Spherical_Overdensity_3D();    
-  } else if (strcmp(P.init, "Clouds")==0) {
-    Clouds();    
-  } else if (strcmp(P.init, "Read_Grid")==0) {
-    #ifndef ONLY_PARTICLES
+  } else if (strcmp(P.init, "Spherical_Overdensity_3D") == 0) {
+    Spherical_Overdensity_3D();
+  } else if (strcmp(P.init, "Clouds") == 0) {
+    Clouds();
+  } else if (strcmp(P.init, "Read_Grid") == 0) {
+#ifndef ONLY_PARTICLES
     Read_Grid(P);
-    #else  // ONLY_PARTICLES
+#else   // ONLY_PARTICLES
     // Initialize a uniform hydro grid when only integrating particles
     Uniform_Grid();
-    #endif  // ONLY_PARTICLES
-  } else if (strcmp(P.init, "Uniform")==0) {
+#endif  // ONLY_PARTICLES
+  } else if (strcmp(P.init, "Read_Grid_Cat") == 0) {
+    Read_Grid_Cat(P);
+  } else if (strcmp(P.init, "Uniform") == 0) {
     Uniform_Grid();
-  } else if (strcmp(P.init, "Zeldovich_Pancake")==0) {
+  } else if (strcmp(P.init, "Zeldovich_Pancake") == 0) {
     Zeldovich_Pancake(P);
-  } else if (strcmp(P.init, "Chemistry_Test")==0) {
+  } else if (strcmp(P.init, "Chemistry_Test") == 0) {
     Chemistry_Test(P);
+#ifdef MHD
+  } else if (strcmp(P.init, "Circularly_Polarized_Alfven_Wave") == 0) {
+    Circularly_Polarized_Alfven_Wave(P);
+  } else if (strcmp(P.init, "Advecting_Field_Loop") == 0) {
+    Advecting_Field_Loop(P);
+  } else if (strcmp(P.init, "MHD_Spherical_Blast") == 0) {
+    MHD_Spherical_Blast(P);
+  } else if (strcmp(P.init, "Orszag_Tang_Vortex") == 0) {
+    Orszag_Tang_Vortex();
+#endif  // MHD
   } else {
-    chprintf ("ABORT: %s: Unknown initial conditions!\n", P.init);
+    chprintf("ABORT: %s: Unknown initial conditions!\n", P.init);
     chexit(-1);
   }
 
-  if ( C.device != NULL )
-    {
-    CudaSafeCall(
-      cudaMemcpy(C.device, C.density, H.n_fields*H.n_cells*sizeof(Real),
-                 cudaMemcpyHostToDevice) );
-    }
+  if (C.device != NULL) {
+    GPU_Error_Check(cudaMemcpy(C.device, C.density, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyHostToDevice));
+  }
 }
 
-/*! \fn void Set_Domain_Properties(struct parameters P)
+/*! \fn void Set_Domain_Properties(struct Parameters P)
  *  \brief Set local domain properties */
-void Grid3D::Set_Domain_Properties(struct parameters P)
+void Grid3D::Set_Domain_Properties(struct Parameters P)
 {
   // Global Boundary Coordinates
   H.xbound = P.xmin;
@@ -113,9 +119,9 @@ void Grid3D::Set_Domain_Properties(struct parameters P)
   H.zdglobal = P.zlen;
 
 #ifndef MPI_CHOLLA
-  Real nx_param = (Real) (H.nx - 2*H.n_ghost);
-  Real ny_param = (Real) (H.ny - 2*H.n_ghost);
-  Real nz_param = (Real) (H.nz - 2*H.n_ghost);
+  Real nx_param = (Real)(H.nx - 2 * H.n_ghost);
+  Real ny_param = (Real)(H.ny - 2 * H.n_ghost);
+  Real nz_param = (Real)(H.nz - 2 * H.n_ghost);
 
   // Local Boundary Coordinates
   H.xblocal = H.xbound;
@@ -123,13 +129,13 @@ void Grid3D::Set_Domain_Properties(struct parameters P)
   H.zblocal = H.zbound;
 
   H.xblocal_max = H.xblocal + P.xlen;
-  H.yblocal_max = H.yblocal + P.ylen; 
-  H.zblocal_max = H.zblocal + P.zlen; 
+  H.yblocal_max = H.yblocal + P.ylen;
+  H.zblocal_max = H.zblocal + P.zlen;
 
 #else
-  Real nx_param = (Real) nx_global;
-  Real ny_param = (Real) ny_global;
-  Real nz_param = (Real) nz_global;
+  Real nx_param = (Real)nx_global;
+  Real ny_param = (Real)ny_global;
+  Real nz_param = (Real)nz_global;
 
   // Local Boundary Coordinates
   /*
@@ -137,47 +143,41 @@ void Grid3D::Set_Domain_Properties(struct parameters P)
   H.yblocal = H.ybound + P.ylen * ((Real) ny_local_start) / ny_param;
   H.zblocal = H.zbound + P.zlen * ((Real) nz_local_start) / nz_param;
   */
-  H.xblocal = H.xbound + ((Real) nx_local_start) * (P.xlen / nx_param);
-  H.yblocal = H.ybound + ((Real) ny_local_start) * (P.ylen / ny_param);
-  H.zblocal = H.zbound + ((Real) nz_local_start) * (P.zlen / nz_param);
+  H.xblocal = H.xbound + ((Real)nx_local_start) * (P.xlen / nx_param);
+  H.yblocal = H.ybound + ((Real)ny_local_start) * (P.ylen / ny_param);
+  H.zblocal = H.zbound + ((Real)nz_local_start) * (P.zlen / nz_param);
 
-  H.xblocal_max = H.xbound + ((Real) (nx_local_start + H.nx - 2*H.n_ghost)) * (P.xlen / nx_param);
-  H.yblocal_max = H.ybound + ((Real) (ny_local_start + H.ny - 2*H.n_ghost)) * (P.ylen / ny_param);
-  H.zblocal_max = H.zbound + ((Real) (nz_local_start + H.nz - 2*H.n_ghost)) * (P.zlen / nz_param);
+  H.xblocal_max = H.xbound + ((Real)(nx_local_start + H.nx - 2 * H.n_ghost)) * (P.xlen / nx_param);
+  H.yblocal_max = H.ybound + ((Real)(ny_local_start + H.ny - 2 * H.n_ghost)) * (P.ylen / ny_param);
+  H.zblocal_max = H.zbound + ((Real)(nz_local_start + H.nz - 2 * H.n_ghost)) * (P.zlen / nz_param);
 
 #endif
 
   /*perform 1-D first*/
-  if(H.nx > 1 && H.ny==1 && H.nz==1)
-  {
+  if (H.nx > 1 && H.ny == 1 && H.nz == 1) {
     H.dx = P.xlen / nx_param;
     H.dy = P.ylen;
     H.dz = P.zlen;
   }
 
   /*perform 2-D next*/
-  if(H.nx > 1 && H.ny>1 && H.nz==1)
-  {
+  if (H.nx > 1 && H.ny > 1 && H.nz == 1) {
     H.dx = P.xlen / nx_param;
     H.dy = P.ylen / ny_param;
     H.dz = P.zlen;
   }
 
   /*perform 3-D last*/
-  if(H.nx>1 && H.ny>1 && H.nz>1)
-  {
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
     H.dx = P.xlen / nx_param;
     H.dy = P.ylen / ny_param;
     H.dz = P.zlen / nz_param;
-
   }
 }
 
-
-
-/*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz)
- *  \brief Constant gas properties. */
-void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz)
+/*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real
+ * By, Real Bz) \brief Constant gas properties. */
+void Grid3D::Constant(Parameters const &P)
 {
   int i, j, k, id;
   int istart, jstart, kstart, iend, jend, kend;
@@ -186,297 +186,404 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real
   Real n, T;
 
   istart = H.n_ghost;
-  iend   = H.nx-H.n_ghost;
+  iend   = H.nx - H.n_ghost;
   if (H.ny > 1) {
     jstart = H.n_ghost;
-    jend   = H.ny-H.n_ghost;
-  }
-  else {
+    jend   = H.ny - H.n_ghost;
+  } else {
     jstart = 0;
     jend   = H.ny;
   }
   if (H.nz > 1) {
     kstart = H.n_ghost;
-    kend   = H.nz-H.n_ghost;
-  }
-  else {
+    kend   = H.nz - H.n_ghost;
+  } else {
     kstart = 0;
     kend   = H.nz;
   }
 
   // set initial values of conserved variables
-  for(k=kstart-1; k<kend; k++) {
-    for(j=jstart-1; j<jend; j++) {
-      for(i=istart-1; i<iend; i++) {
-
-        //get cell index
-        id = i + j*H.nx + k*H.nx*H.ny;
-
-        // Set the magnetic field including the rightmost ghost cell on the
-        // left side which is really the left face of the first grid cell
-        #ifdef  MHD
-          C.magnetic_x[id] = Bx;
-          C.magnetic_y[id] = By;
-          C.magnetic_z[id] = Bz;
-        #endif  // MHD
+  for (k = kstart - 1; k < kend; k++) {
+    for (j = jstart - 1; j < jend; j++) {
+      for (i = istart - 1; i < iend; i++) {
+        // get cell index
+        id = i + j * H.nx + k * H.nx * H.ny;
+
+// Set the magnetic field including the rightmost ghost cell on the
+// left side which is really the left face of the first grid cell
+#ifdef MHD
+        C.magnetic_x[id] = P.Bx;
+        C.magnetic_y[id] = P.By;
+        C.magnetic_z[id] = P.Bz;
+#endif  // MHD
 
         // Exclude the rightmost ghost cell on the "left" side
-        if ((k >= kstart) and (j >= jstart) and (i >= istart))
-        {
+        if ((k >= kstart) and (j >= jstart) and (i >= istart)) {
           // set constant initial states
-          C.density[id]    = rho;
-          C.momentum_x[id] = rho*vx;
-          C.momentum_y[id] = rho*vy;
-          C.momentum_z[id] = rho*vz;
-          C.Energy[id]     = P/(gama-1.0) + 0.5*rho*(vx*vx + vy*vy + vz*vz);
-          #ifdef DE
-          C.GasEnergy[id]  = P/(gama-1.0);
-          #endif  // DE
+          C.density[id]    = P.rho;
+          C.momentum_x[id] = P.rho * P.vx;
+          C.momentum_y[id] = P.rho * P.vy;
+          C.momentum_z[id] = P.rho * P.vz;
+          C.Energy[id]     = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz);
+#ifdef DE
+          C.GasEnergy[id] = P.P / (gama - 1.0);
+#endif  // DE
         }
-/*
-        if (i==istart && j==jstart && k==kstart) {
-          n = rho*DENSITY_UNIT / (mu*MP);
-          T = P*PRESSURE_UNIT / (n*KB);
+        if (i == istart && j == jstart && k == kstart) {
+          n = P.rho * DENSITY_UNIT / (mu * MP);
+          T = P.P * PRESSURE_UNIT / (n * KB);
           printf("Initial n = %e, T = %e\n", n, T);
         }
-*/
       }
     }
   }
-
 }
 
-
 /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A)
  *  \brief Sine wave perturbation. */
-void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A)
+void Grid3D::Sound_Wave(Parameters const &P)
 {
   int i, j, k, id;
   int istart, jstart, kstart, iend, jend, kend;
   Real x_pos, y_pos, z_pos;
 
   istart = H.n_ghost;
-  iend   = H.nx-H.n_ghost;
+  iend   = H.nx - H.n_ghost;
   if (H.ny > 1) {
     jstart = H.n_ghost;
-    jend   = H.ny-H.n_ghost;
-  }
-  else {
+    jend   = H.ny - H.n_ghost;
+  } else {
     jstart = 0;
     jend   = H.ny;
   }
   if (H.nz > 1) {
     kstart = H.n_ghost;
-    kend   = H.nz-H.n_ghost;
-  }
-  else {
+    kend   = H.nz - H.n_ghost;
+  } else {
     kstart = 0;
     kend   = H.nz;
   }
 
   // set initial values of conserved variables
-  for(k=kstart; k<kend; k++) {
-    for(j=jstart; j<jend; j++) {
-      for(i=istart; i<iend; i++) {
-
-        //get cell index
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = kstart; k < kend; k++) {
+    for (j = jstart; j < jend; j++) {
+      for (i = istart; i < iend; i++) {
+        // get cell index
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // get cell-centered position
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
         // set constant initial states
-        C.density[id]    = rho;
-        C.momentum_x[id] = rho*vx;
-        C.momentum_y[id] = rho*vy;
-        C.momentum_z[id] = rho*vz;
-        C.Energy[id]     = P/(gama-1.0) + 0.5*rho*(vx*vx + vy*vy + vz*vz);
+        C.density[id]    = P.rho;
+        C.momentum_x[id] = P.rho * P.vx;
+        C.momentum_y[id] = P.rho * P.vy;
+        C.momentum_z[id] = P.rho * P.vz;
+        C.Energy[id]     = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz);
         // add small-amplitude perturbations
-        C.density[id]    = C.density[id]    + A * sin(2.0*PI*x_pos);
-        C.momentum_x[id] = C.momentum_x[id] + A * sin(2.0*PI*x_pos);
-        C.momentum_y[id] = C.momentum_y[id] + A * sin(2.0*PI*x_pos);
-        C.momentum_z[id] = C.momentum_z[id] + A * sin(2.0*PI*x_pos);
-        C.Energy[id]     = C.Energy[id]     + A * (1.5) * sin(2*PI*x_pos);
-        #ifdef DE
-	C.GasEnergy[id]  = P/(gama-1.0);
-        #endif  //DE
+        C.density[id]    = C.density[id] + P.A * sin(2.0 * M_PI * x_pos);
+        C.momentum_x[id] = C.momentum_x[id] + P.A * sin(2.0 * M_PI * x_pos);
+        C.momentum_y[id] = C.momentum_y[id] + P.A * sin(2.0 * M_PI * x_pos);
+        C.momentum_z[id] = C.momentum_z[id] + P.A * sin(2.0 * M_PI * x_pos);
+        C.Energy[id]     = C.Energy[id] + P.A * (1.5) * sin(2 * M_PI * x_pos);
+#ifdef DE
+        C.GasEnergy[id] = P.P / (gama - 1.0);
+#endif  // DE
+#ifdef DE
+        C.GasEnergy[id] = P.P / (gama - 1.0);
+#endif  // DE
       }
     }
   }
-
 }
 
+/*! \fn void Linear_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A)
+ *  \brief Sine wave perturbation. */
+void Grid3D::Linear_Wave(Parameters const &P)
+{
+  // Compute any test parameters needed
+  // ==================================
+  // Angles
+  Real const sin_yaw   = std::sin(P.yaw);
+  Real const cos_yaw   = std::cos(P.yaw);
+  Real const sin_pitch = std::sin(P.pitch);
+  Real const cos_pitch = std::cos(P.pitch);
+
+  Real const wavenumber = 2.0 * M_PI / P.wave_length;  // the angular wave number k
+
+#ifdef MHD
+  // TODO: This method of setting the magnetic fields via the vector potential should work but instead leads to small
+  // TODO: errors in the magnetic field that tend to amplify over time until the solution diverges. I don't know why
+  // TODO: that is the case and can't figure out the reason. Without this we can't run linear waves at an angle to the
+  // TODO: grid.
+  // // Compute the vector potential
+  // // ============================
+  // std::vector<Real> vectorPotential(3 * H.n_cells, 0);
+
+  // // lambda function for computing the vector potential
+  // auto Compute_Vector_Potential = [&](Real const &x_loc, Real const &y_loc, Real const &z_loc) {
+  //   // The "_rot" variables are the rotated version
+  //   Real const x_rot = x_loc * cos_pitch * cos_yaw + y_loc * cos_pitch * sin_yaw + z_loc * sin_pitch;
+  //   Real const y_rot = -x_loc * sin_yaw + y_loc * cos_yaw;
+
+  //   Real const a_y = P.Bz * x_rot - (P.A * P.rEigenVec_Bz / wavenumber) * std::cos(wavenumber * x_rot);
+  //   Real const a_z = -P.By * x_rot + (P.A * P.rEigenVec_By / wavenumber) * std::cos(wavenumber * x_rot) + P.Bx *
+  //   y_rot;
+
+  //   return std::make_pair(a_y, a_z);
+  // };
+
+  // for (size_t k = 0; k < H.nz; k++) {
+  //   for (size_t j = 0; j < H.ny; j++) {
+  //     for (size_t i = 0; i < H.nx; i++) {
+  //       // Get cell index
+  //       size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+  //       Real x, y, z;
+  //       Get_Position(i, j, k, &x, &y, &z);
+
+  //       auto vectorPot                         = Compute_Vector_Potential(x, y + H.dy / 2., z + H.dz / 2.);
+  //       vectorPotential.at(id + 0 * H.n_cells) = -vectorPot.first * sin_yaw - vectorPot.second * sin_pitch * cos_yaw;
+
+  //       vectorPot                              = Compute_Vector_Potential(x + H.dx / 2., y, z + H.dz / 2.);
+  //       vectorPotential.at(id + 1 * H.n_cells) = vectorPot.first * cos_yaw - vectorPot.second * sin_pitch * sin_yaw;
+
+  //       vectorPot                              = Compute_Vector_Potential(x + H.dx / 2., y + H.dy / 2., z);
+  //       vectorPotential.at(id + 2 * H.n_cells) = vectorPot.second * cos_pitch;
+  //     }
+  //   }
+  // }
+
+  // // Compute the magnetic field from the vector potential
+  // // ====================================================
+  // mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential);
+
+  Real shift = H.dx;
+  size_t dir = 0;
+  if (sin_yaw == 1.0) {
+    shift = H.dy;
+    dir   = 1;
+  } else if (sin_pitch == 1.0) {
+    shift = H.dz;
+    dir   = 2;
+  }
+
+  // set initial values of conserved variables
+  for (int k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (int j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (int i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        // get cell index
+        size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // get cell-centered position
+        Real x_pos, y_pos, z_pos;
+        Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
+        Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch;
+
+        Real const sine_x = std::sin(x_pos_rot * wavenumber);
+
+        Real bx = P.Bx + P.A * P.rEigenVec_Bx * sine_x;
+        Real by = P.By + P.A * P.rEigenVec_By * sine_x;
+        Real bz = P.Bz + P.A * P.rEigenVec_Bz * sine_x;
+
+        C.magnetic_x[id] = bx * cos_pitch * cos_yaw - by * sin_yaw - bz * sin_pitch * cos_yaw;
+        C.magnetic_y[id] = bx * cos_pitch * sin_yaw + by * cos_yaw - bz * sin_pitch * sin_yaw;
+        C.magnetic_z[id] = bx * sin_pitch + bz * cos_pitch;
+      }
+    }
+  }
+#endif  // MHD
+
+  // Compute the hydro variables
+  // ===========================
+  for (size_t k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) {
+    for (size_t j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) {
+      for (size_t i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) {
+        // get cell index
+        size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // get cell-centered position
+        Real x_pos, y_pos, z_pos;
+        Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
+        Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch;
+
+        Real const sine_x = std::sin(x_pos_rot * wavenumber);
+
+        // Density
+        C.density[id] = P.rho + P.A * P.rEigenVec_rho * sine_x;
+
+        // Momenta
+        Real mx = P.rho * P.vx + P.A * P.rEigenVec_MomentumX * sine_x;
+        Real my = P.A * P.rEigenVec_MomentumY * sine_x;
+        Real mz = P.A * P.rEigenVec_MomentumZ * sine_x;
+
+        C.momentum_x[id] = mx * cos_pitch * cos_yaw - my * sin_yaw - mz * sin_pitch * cos_yaw;
+        C.momentum_y[id] = mx * cos_pitch * sin_yaw + my * cos_yaw - mz * sin_pitch * sin_yaw;
+        C.momentum_z[id] = mx * sin_pitch + mz * cos_pitch;
+
+        // Energy
+        C.Energy[id] = P.P / (P.gamma - 1.0) + 0.5 * P.rho * P.vx * P.vx + P.A * sine_x * P.rEigenVec_E;
+#ifdef MHD
+        C.Energy[id] += 0.5 * (P.Bx * P.Bx + P.By * P.By + P.Bz * P.Bz);
+#endif  // MHD
+      }
+    }
+  }
+}
 
 /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A)
- *  \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */
-void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A)
+ *  \brief Square wave density perturbation with amplitude A*rho in pressure
+ * equilibrium. */
+void Grid3D::Square_Wave(Parameters const &P)
 {
   int i, j, k, id;
   int istart, jstart, kstart, iend, jend, kend;
   Real x_pos, y_pos, z_pos;
 
   istart = H.n_ghost;
-  iend   = H.nx-H.n_ghost;
+  iend   = H.nx - H.n_ghost;
   if (H.ny > 1) {
     jstart = H.n_ghost;
-    jend   = H.ny-H.n_ghost;
-  }
-  else {
+    jend   = H.ny - H.n_ghost;
+  } else {
     jstart = 0;
     jend   = H.ny;
   }
   if (H.nz > 1) {
     kstart = H.n_ghost;
-    kend   = H.nz-H.n_ghost;
-  }
-  else {
+    kend   = H.nz - H.n_ghost;
+  } else {
     kstart = 0;
     kend   = H.nz;
   }
 
   // set initial values of conserved variables
-  for(k=kstart; k<kend; k++) {
-    for(j=jstart; j<jend; j++) {
-      for(i=istart; i<iend; i++) {
-
-        //get cell index
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = kstart; k < kend; k++) {
+    for (j = jstart; j < jend; j++) {
+      for (i = istart; i < iend; i++) {
+        // get cell index
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // get cell-centered position
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-        C.density[id]    = rho;
-        //C.momentum_x[id] = 0.0;
-        C.momentum_x[id] = rho * vx;
-        C.momentum_y[id] = rho * vy;
-        C.momentum_z[id] = rho * vz;
-        //C.momentum_z[id] = rho_l * v_l;
-        C.Energy[id]     = P/(gama-1.0) + 0.5*rho*(vx*vx + vy*vy + vz*vz);
-        #ifdef DE
-        C.GasEnergy[id]  = P/(gama-1.0);
-        #endif
-        #ifdef SCALAR
-        C.scalar[id] = C.density[id]*0.0;
-        #endif
-        if (x_pos > 0.25*H.xdglobal && x_pos < 0.75*H.xdglobal)
-        {
-          C.density[id]    = rho*A;
-          C.momentum_x[id] = rho*A * vx;
-          C.momentum_y[id] = rho*A * vy;
-          C.momentum_z[id] = rho*A * vz;
-          C.Energy[id]     = P/(gama-1.0) + 0.5*rho*A*(vx*vx + vy*vy + vz*vz);
-          #ifdef DE
-          C.GasEnergy[id]  = P/(gama-1.0);
-          #endif
-          #ifdef SCALAR
-          C.scalar[id] = C.density[id]*1.0;
-          #endif
+        C.density[id] = P.rho;
+        // C.momentum_x[id] = 0.0;
+        C.momentum_x[id] = P.rho * P.vx;
+        C.momentum_y[id] = P.rho * P.vy;
+        C.momentum_z[id] = P.rho * P.vz;
+        // C.momentum_z[id] = rho_l * v_l;
+        C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz);
+#ifdef DE
+        C.GasEnergy[id] = P.P / (gama - 1.0);
+#endif
+#ifdef SCALAR
+  #ifdef BASIC_SCALAR
+        C.basic_scalar[id] = C.density[id] * 0.0;
+  #endif
+#endif
+        if (x_pos > 0.25 * H.xdglobal && x_pos < 0.75 * H.xdglobal) {
+          C.density[id]    = P.rho * P.A;
+          C.momentum_x[id] = P.rho * P.A * P.vx;
+          C.momentum_y[id] = P.rho * P.A * P.vy;
+          C.momentum_z[id] = P.rho * P.A * P.vz;
+          C.Energy[id]     = P.P / (gama - 1.0) + 0.5 * P.rho * P.A * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz);
+#ifdef DE
+          C.GasEnergy[id] = P.P / (gama - 1.0);
+#endif
+#ifdef SCALAR
+  #ifdef BASIC_SCALAR
+          C.basic_scalar[id] = C.density[id] * 1.0;
+  #endif
+#endif
         }
       }
     }
   }
 }
 
-
-/*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l,
-                     Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r,
-                     Real diaph)
+/*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real
+ Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real
+ P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph)
  *  \brief Initialize the grid with a Riemann problem. */
-void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l,
-                     Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r,
-                     Real diaph)
+void Grid3D::Riemann(Parameters const &P)
 {
-  int i, j, k, id;
-  int istart, jstart, kstart, iend, jend, kend;
-  Real x_pos, y_pos, z_pos;
-  Real v, P, cs;
-
-  istart = H.n_ghost;
-  iend   = H.nx-H.n_ghost;
+  size_t const istart = H.n_ghost - 1;
+  size_t const iend   = H.nx - H.n_ghost;
+  size_t jstart, kstart, jend, kend;
   if (H.ny > 1) {
-    jstart = H.n_ghost;
-    jend   = H.ny-H.n_ghost;
-  }
-  else {
+    jstart = H.n_ghost - 1;
+    jend   = H.ny - H.n_ghost;
+  } else {
     jstart = 0;
     jend   = H.ny;
   }
   if (H.nz > 1) {
-    kstart = H.n_ghost;
-    kend   = H.nz-H.n_ghost;
-  }
-  else {
+    kstart = H.n_ghost - 1;
+    kend   = H.nz - H.n_ghost;
+  } else {
     kstart = 0;
     kend   = H.nz;
   }
 
-  #ifdef MHD
-    auto setMagnetFields = [&] ()
-    {
-      Real x_pos_face = x_pos + 0.5 * H.dx;
-
-      if (x_pos_face < diaph)
-      {
-        C.magnetic_x[id] = Bx_l;
-        C.magnetic_y[id] = By_l;
-        C.magnetic_z[id] = Bz_l;
-      }
-      else
-      {
-        C.magnetic_x[id] = Bx_r;
-        C.magnetic_y[id] = By_r;
-        C.magnetic_z[id] = Bz_r;
-      }
-    };
-  #endif  // MHD
-
   // set initial values of conserved variables
-  for(k=kstart-1; k<kend; k++) {
-    for(j=jstart-1; j<jend; j++) {
-      for(i=istart-1; i<iend; i++) {
-
-        //get cell index
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (size_t k = kstart; k < kend; k++) {
+    for (size_t j = jstart; j < jend; j++) {
+      for (size_t i = istart; i < iend; i++) {
+        // get cell index
+        size_t const id = i + j * H.nx + k * H.nx * H.ny;
 
         // get cell-centered position
+        Real x_pos, y_pos, z_pos;
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-        #ifdef  MHD
-          // Set the magnetic field including the rightmost ghost cell on the
-          // left side which is really the left face of the first grid cell
-          setMagnetFields();
-        #endif  //MHD
+#ifdef MHD
+        // Set the magnetic field including the rightmost ghost cell on the
+        // left side which is really the left face of the first grid cell
+        // WARNING: Only correct in 3-D
+        if (x_pos < P.diaph) {
+          C.magnetic_x[id] = P.Bx_l;
+          C.magnetic_y[id] = P.By_l;
+          C.magnetic_z[id] = P.Bz_l;
+        } else {
+          C.magnetic_x[id] = P.Bx_r;
+          C.magnetic_y[id] = P.By_r;
+          C.magnetic_z[id] = P.Bz_r;
+        }
+#endif  // MHD
 
         // Exclude the rightmost ghost cell on the "left" side
-        if ((k >= kstart) and (j >= jstart) and (i >= istart))
-        {
-          if (x_pos < diaph)
-          {
-            C.density[id]    = rho_l;
-            C.momentum_x[id] = rho_l * vx_l;
-            C.momentum_y[id] = rho_l * vy_l;
-            C.momentum_z[id] = rho_l * vz_l;
-            C.Energy[id]     = P_l/(gama-1.0) + 0.5*rho_l*(vx_l*vx_l + vy_l*vy_l + vz_l*vz_l);
-            #ifdef SCALAR
-            C.scalar[id] = 1.0*rho_l;
-            #endif  //SCALAR
-            #ifdef DE
-            C.GasEnergy[id]  = P_l/(gama-1.0);
-            #endif  //DE
-          }
-          else
-          {
-            C.density[id]    = rho_r;
-            C.momentum_x[id] = rho_r * vx_r;
-            C.momentum_y[id] = rho_r * vy_r;
-            C.momentum_z[id] = rho_r * vz_r;
-            C.Energy[id]     = P_r/(gama-1.0) + 0.5*rho_r*(vx_r*vx_r + vy_r*vy_r + vz_r*vz_r);
-            #ifdef SCALAR
-            C.scalar[id] = 0.0*rho_r;
-            #endif  //SCALAR
-            #ifdef DE
-            C.GasEnergy[id]  = P_r/(gama-1.0);
-            #endif  //DE
+        if ((k >= kstart) and (j >= jstart) and (i >= istart)) {
+          if (x_pos < P.diaph) {
+            C.density[id]    = P.rho_l;
+            C.momentum_x[id] = P.rho_l * P.vx_l;
+            C.momentum_y[id] = P.rho_l * P.vy_l;
+            C.momentum_z[id] = P.rho_l * P.vz_l;
+            C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_l, P.rho_l, P.vx_l, P.vy_l, P.vz_l, gama, P.Bx_l,
+                                                                  P.By_l, P.Bz_l);
+#ifdef SCALAR
+  #ifdef BASIC_SCALAR
+            C.basic_scalar[id] = 1.0 * P.rho_l;
+  #endif
+#endif  // SCALAR
+#ifdef DE
+            C.GasEnergy[id] = P.P_l / (gama - 1.0);
+#endif  // DE
+          } else {
+            C.density[id]    = P.rho_r;
+            C.momentum_x[id] = P.rho_r * P.vx_r;
+            C.momentum_y[id] = P.rho_r * P.vy_r;
+            C.momentum_z[id] = P.rho_r * P.vz_r;
+            C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_r, P.rho_r, P.vx_r, P.vy_r, P.vz_r, gama, P.Bx_r,
+                                                                  P.By_r, P.Bz_r);
+#ifdef SCALAR
+  #ifdef BASIC_SCALAR
+            C.basic_scalar[id] = 0.0 * P.rho_r;
+  #endif
+#endif  // SCALAR
+#ifdef DE
+            C.GasEnergy[id] = P.P_r / (gama - 1.0);
+#endif  // DE
           }
         }
       }
@@ -484,9 +591,9 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real
   }
 }
 
-
 /*! \fn void Shu_Osher()
- *  \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone 2008, Section 8.1 */
+ *  \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone
+ * 2008, Section 8.1 */
 void Grid3D::Shu_Osher()
 {
   int i, id;
@@ -494,42 +601,37 @@ void Grid3D::Shu_Osher()
   Real vx, P;
 
   // set initial values of conserved variables
-  for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
+  for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
     id = i;
     // get centered x position
     Get_Position(i, H.n_ghost, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
-    if (x_pos < -0.8)
-    {
-      C.density[id] = 3.857143;
-      vx = 2.629369;
-      C.momentum_x[id] = C.density[id]*vx;
+    if (x_pos < -0.8) {
+      C.density[id]    = 3.857143;
+      vx               = 2.629369;
+      C.momentum_x[id] = C.density[id] * vx;
       C.momentum_y[id] = 0.0;
       C.momentum_z[id] = 0.0;
-      P = 10.33333;
-      C.Energy[id] = P/(gama-1.0) + 0.5*C.density[id]*vx*vx;
-    }
-    else
-    {
-      C.density[id] = 1.0 + 0.2*sin(5.0*PI*x_pos);
-      Real vx = 0.0;
-      C.momentum_x[id] = C.density[id]*vx;
+      P                = 10.33333;
+      C.Energy[id]     = P / (gama - 1.0) + 0.5 * C.density[id] * vx * vx;
+    } else {
+      C.density[id]    = 1.0 + 0.2 * sin(5.0 * M_PI * x_pos);
+      Real vx          = 0.0;
+      C.momentum_x[id] = C.density[id] * vx;
       C.momentum_y[id] = 0.0;
       C.momentum_z[id] = 0.0;
-      Real P = 1.0;
-      C.Energy[id] = P/(gama-1.0) + 0.5*C.density[id]*vx*vx;
+      Real P           = 1.0;
+      C.Energy[id]     = P / (gama - 1.0) + 0.5 * C.density[id] * vx * vx;
     }
-    #ifdef DE
-    C.GasEnergy[id]  = P/(gama-1.0);
-    #endif  //DE
-
-
+#ifdef DE
+    C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
   }
 }
 
-
 /*! \fn void Blast_1D()
- *  \brief Initialize the grid with two interacting blast waves. See Stone 2008, Section 8.1.*/
+ *  \brief Initialize the grid with two interacting blast waves. See Stone 2008,
+ * Section 8.1.*/
 void Grid3D::Blast_1D()
 {
   int i, id;
@@ -537,44 +639,37 @@ void Grid3D::Blast_1D()
   Real vx, P;
 
   // set initial values of conserved variables
-  for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
+  for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
     id = i;
     // get the centered x position
     Get_Position(i, H.n_ghost, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
-    if (x_pos < 0.1)
-    {
-      C.density[id] = 1.0;
+    if (x_pos < 0.1) {
+      C.density[id]    = 1.0;
       C.momentum_x[id] = 0.0;
       C.momentum_y[id] = 0.0;
       C.momentum_z[id] = 0.0;
-      P = 1000.0;
-    }
-    else if (x_pos > 0.9)
-    {
-      C.density[id] = 1.0;
+      P                = 1000.0;
+    } else if (x_pos > 0.9) {
+      C.density[id]    = 1.0;
       C.momentum_x[id] = 0.0;
       C.momentum_y[id] = 0.0;
       C.momentum_z[id] = 0.0;
-      P = 100;
-    }
-    else
-    {
-      C.density[id] = 1.0;
+      P                = 100;
+    } else {
+      C.density[id]    = 1.0;
       C.momentum_x[id] = 0.0;
       C.momentum_y[id] = 0.0;
       C.momentum_z[id] = 0.0;
-      P = 0.01;
+      P                = 0.01;
     }
-    C.Energy[id] = P/(gama-1.0);
-    #ifdef DE
-    C.GasEnergy[id]  = P/(gama-1.0);
-    #endif  //DE
-    
+    C.Energy[id] = P / (gama - 1.0);
+#ifdef DE
+    C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
   }
 }
 
-
 /*! \fn void KH()
  *  \brief Initialize the grid with a Kelvin-Helmholtz instability.
            This version of KH test has a discontinuous boundary.
@@ -591,79 +686,66 @@ void Grid3D::KH()
   d2 = 1.0;
   v1 = 0.5;
   v2 = -0.5;
-  P = 2.5;
-  A = 0.1;
+  P  = 2.5;
+  A  = 0.1;
 
   istart = H.n_ghost;
-  iend   = H.nx-H.n_ghost;
+  iend   = H.nx - H.n_ghost;
   jstart = H.n_ghost;
-  jend   = H.ny-H.n_ghost;
+  jend   = H.ny - H.n_ghost;
   if (H.nz > 1) {
     kstart = H.n_ghost;
-    kend   = H.nz-H.n_ghost;
-  }
-  else {
+    kend   = H.nz - H.n_ghost;
+  } else {
     kstart = 0;
     kend   = H.nz;
   }
 
   // set the initial values of the conserved variables
-  for (k=kstart; k<kend; k++) {
-    for (j=jstart; j<jend; j++) {
-      for (i=istart; i<iend; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = kstart; k < kend; k++) {
+    for (j = jstart; j < jend; j++) {
+      for (i = istart; i < iend; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
         // get the centered x and y positions
         Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
         // outer quarters of slab
-        if (y_pos <= 1.0*H.ydglobal/4.0)
-        {
-          C.density[id] = d2;
-          C.momentum_x[id] = v2*C.density[id];
-          C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos);
+        if ((y_pos <= 1.0 * H.ydglobal / 4.0) or (y_pos >= 3.0 * H.ydglobal / 4.0)) {
+          C.density[id]    = d2;
+          C.momentum_x[id] = v2 * C.density[id];
+          C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos);
           C.momentum_z[id] = 0.0;
-          #ifdef SCALAR
-          C.scalar[id] = 0.0;
-          #endif
-        }
-        else if (y_pos >= 3.0*H.ydglobal/4.0)
-        {
-          C.density[id] = d2;
-          C.momentum_x[id] = v2*C.density[id];
-          C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos);
-          C.momentum_z[id] = 0.0;
-
-          #ifdef SCALAR
-          C.scalar[id] = 0.0;
-          #endif
-        }
-        // inner half of slab
-        else
-        {
-          C.density[id] = d1;
-          C.momentum_x[id] = v1*C.density[id];
-          C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos);
+#ifdef SCALAR
+  #ifdef BASIC_SCALAR
+          C.basic_scalar[id] = 0.0;
+  #endif
+#endif
+          // inner half of slab
+        } else {
+          C.density[id]    = d1;
+          C.momentum_x[id] = v1 * C.density[id];
+          C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos);
           C.momentum_z[id] = 0.0;
-       
-          #ifdef SCALAR
-          C.scalar[id] = 1.0*d1;
-          #endif
+#ifdef SCALAR
+  #ifdef BASIC_SCALAR
+          C.basic_scalar[id] = 1.0 * d1;
+  #endif
+#endif
         }
-	C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id];
-        #ifdef DE
-        C.GasEnergy[id]  = P/(gama-1.0);
-        #endif  //DE
-
-
+        C.Energy[id] =
+            P / (gama - 1.0) +
+            0.5 * (C.momentum_x[id] * C.momentum_x[id] + C.momentum_y[id] * C.momentum_y[id]) / C.density[id];
+#ifdef DE
+        C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
       }
     }
   }
-
 }
 
-
 /*! \fn void KH_res_ind()
- *  \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes are resolution independent. */
+ *  \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes
+ * are resolution independent. */
 void Grid3D::KH_res_ind()
 {
   int i, j, k, id;
@@ -673,14 +755,13 @@ void Grid3D::KH_res_ind()
   Real r, yc, zc, phi;
   Real d1, d2, v1, v2, P, dy, A;
   istart = H.n_ghost;
-  iend   = H.nx-H.n_ghost;
+  iend   = H.nx - H.n_ghost;
   jstart = H.n_ghost;
-  jend   = H.ny-H.n_ghost;
+  jend   = H.ny - H.n_ghost;
   if (H.nz > 1) {
     kstart = H.n_ghost;
-    kend   = H.nz-H.n_ghost;
-  }
-  else {
+    kend   = H.nz - H.n_ghost;
+  } else {
     kstart = 0;
     kend   = H.nz;
   }
@@ -689,99 +770,114 @@ void Grid3D::KH_res_ind()
   yc = 0.0;
   zc = 0.0;
 
-  d1 = 100.0; // inner density
-  d2 = 1.0; // outer density
-  v1 = 10.5; // inner velocity
-  v2 = 9.5; // outer velocity
-  P = 2.5; // pressure
-  dy = 0.05; // width of ramp function (see Robertson 2009)
-  A = 0.1; // amplitude of the perturbation
+  d1 = 100.0;  // inner density
+  d2 = 1.0;    // outer density
+  v1 = 0.5;    // inner velocity
+  v2 = -0.5;   // outer velocity
+  P  = 2.5;    // pressure
+  dy = 0.05;   // width of ramp function (see Robertson 2009)
+  A  = 0.1;    // amplitude of the perturbation
 
-  // Note: ramp function from Robertson 2009 is 1/Ramp(y) = (1 + exp(2*(y-0.25)/dy))*(1 + exp(2*(0.75 - y)/dy));  
+  // Note: ramp function from Robertson 2009 is 1/Ramp(y) = (1 +
+  // exp(2*(y-0.25)/dy))*(1 + exp(2*(0.75 - y)/dy));
 
   // set the initial values of the conserved variables
-  for (k=kstart; k<kend; k++) {
-    for (j=jstart; j<jend; j++) {
-      for (i=istart; i<iend; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = kstart; k < kend; k++) {
+    for (j = jstart; j < jend; j++) {
+      for (i = istart; i < iend; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
         // get the centered x and y positions
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
-
-
-        // inner fluid
-        if (fabs(y_pos-0.5) < 0.25)
-        {
-          if (y_pos > 0.5)
-          {
-            C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-            C.momentum_x[id] = v1*C.density[id] - C.density[id] * (v1-v2) * exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2) /(dy*dy) );
-            C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ) ;
+        // 2D initial conditions:
+        if (H.nz == 1) {
+          // inner fluid
+          if (fabs(y_pos - 0.5) < 0.25) {
+            if (y_pos > 0.5) {
+              C.density[id] =
+                  d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_x[id] = v1 * C.density[id] -
+                                 C.density[id] * (v1 - v2) *
+                                     exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) *
+                                 exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            } else {
+              C.density[id] =
+                  d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_x[id] = v1 * C.density[id] -
+                                 C.density[id] * (v1 - v2) *
+                                     exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) *
+                                 exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            }
           }
-          else
-          {
-            C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-            C.momentum_x[id] = v1*C.density[id] - C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2) /(dy*dy) );
-            C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
+          // outer fluid
+          else {
+            if (y_pos > 0.5) {
+              C.density[id] =
+                  d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_x[id] = v2 * C.density[id] +
+                                 C.density[id] * (v1 - v2) *
+                                     exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) *
+                                 exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            } else {
+              C.density[id] =
+                  d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_x[id] = v2 * C.density[id] +
+                                 C.density[id] * (v1 - v2) *
+                                     exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+              C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) *
+                                 exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            }
           }
-        }
-        // outer fluid
-        else
-        {
-          if (y_pos > 0.5)
+          // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos);
+          C.momentum_z[id] = 0.0;
+
+          // 3D initial conditions:
+        } else {
+          // cylindrical version (3D only)
+          r   = sqrt((z_pos - zc) * (z_pos - zc) + (y_pos - yc) * (y_pos - yc));  // center the cylinder at yc, zc
+          phi = atan2((z_pos - zc), (y_pos - yc));
+
+          if (r < 0.25)  // inside the cylinder
           {
-            C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-            C.momentum_x[id] = v2*C.density[id] + C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-            C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-          }
-          else
+            C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            C.momentum_x[id] =
+                v1 * C.density[id] -
+                C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) *
+                               exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) *
+                               exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+          } else  // outside the cylinder
           {
-            C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-            C.momentum_x[id] = v2*C.density[id] + C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-            C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
+            C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            C.momentum_x[id] =
+                v2 * C.density[id] +
+                C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy));
+            C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) *
+                               (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)));
+            C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) *
+                               (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)));
           }
-
         }
-        //C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos);
-        C.momentum_z[id] = 0.0;
-
-        // cylindrical version (3D only)
-        r = sqrt((z_pos-zc)*(z_pos-zc) + (y_pos-yc)*(y_pos-yc)); // center the cylinder at yc, zc
-        phi = atan2((z_pos-zc), (y_pos-yc));
-
-        if (r < 0.25) // inside the cylinder
-        {
-          C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(r-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-          C.momentum_x[id] = v1*C.density[id] - C.density[id] * exp( -0.5*pow(r-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-          C.momentum_y[id] = cos(phi) * C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-          C.momentum_z[id] = sin(phi) * C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-        }
-        else // outside the cylinder
-        {
-          C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-          C.momentum_x[id] = v2*C.density[id] + C.density[id] * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) );
-          C.momentum_y[id] = cos(phi) * C.density[id] * A*sin(4*PI*x_pos) * (1.0 - exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ));
-          C.momentum_z[id] = sin(phi) * C.density[id] * A*sin(4*PI*x_pos) * (1.0 - exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ));
-        }
-	
-	// No matter what we do with the density and momentum, set the Energy and GasEnergy appropriately
-	mx = C.momentum_x[id];
-	my = C.momentum_y[id];
-	mz = C.momentum_z[id];
-	C.Energy[id] = P/(gama-1.0) + 0.5*(mx*mx + my*my + mz*mz)/C.density[id];	
-
-        #ifdef DE
-	C.GasEnergy[id]  = P/(gama-1.0);
-        #endif // DE
 
-      }// i loop
-    }// j loop
-  }//k loop
+        // No matter what we do with the density and momentum, set the Energy
+        // and GasEnergy appropriately
+        mx           = C.momentum_x[id];
+        my           = C.momentum_y[id];
+        mz           = C.momentum_z[id];
+        C.Energy[id] = P / (gama - 1.0) + 0.5 * (mx * mx + my * my + mz * mz) / C.density[id];
 
+#ifdef DE
+        C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
 
+      }  // i loop
+    }    // j loop
+  }      // k loop
 }
 
-
-
 /*! \fn void Rayleigh_Taylor()
  *  \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */
 void Grid3D::Rayleigh_Taylor()
@@ -791,51 +887,46 @@ void Grid3D::Rayleigh_Taylor()
   Real dl, du, vy, g, P, P_0;
   dl = 1.0;
   du = 2.0;
-  g = -0.1;
+  g  = -0.1;
 
   // set the initial values of the conserved variables
-  for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-    for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-      id = i + j*H.nx;
+  for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+      id = i + j * H.nx;
       // get the centered x and y positions
       Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
       // set the y velocities (small perturbation tapering off from center)
-      vy = 0.01*cos(6*PI*x_pos+PI)*exp(-(y_pos-0.5*H.ydglobal)*(y_pos-0.5*H.ydglobal)/0.1);
-      //vy = 0.0;
+      vy = 0.01 * cos(6 * M_PI * x_pos + M_PI) * exp(-(y_pos - 0.5 * H.ydglobal) * (y_pos - 0.5 * H.ydglobal) / 0.1);
+      // vy = 0.0;
 
       // lower half of slab
-      if (y_pos <= 0.5*H.ydglobal)
-      {
-        P_0 = 1.0/gama - dl*g*0.5;
-        P = P_0 + dl*g*y_pos;
-        C.density[id] = dl;
+      if (y_pos <= 0.5 * H.ydglobal) {
+        P_0              = 1.0 / gama - dl * g * 0.5;
+        P                = P_0 + dl * g * y_pos;
+        C.density[id]    = dl;
         C.momentum_x[id] = 0.0;
-        C.momentum_y[id] = dl*vy;
+        C.momentum_y[id] = dl * vy;
         C.momentum_z[id] = 0.0;
       }
       // upper half of slab
-      else
-      {
-        P_0 = 1.0/gama - du*g*0.5;
-        P = P_0 + du*g*y_pos;
-        C.density[id] = du;
+      else {
+        P_0              = 1.0 / gama - du * g * 0.5;
+        P                = P_0 + du * g * y_pos;
+        C.density[id]    = du;
         C.momentum_x[id] = 0.0;
-        C.momentum_y[id] = du*vy;
+        C.momentum_y[id] = du * vy;
         C.momentum_z[id] = 0.0;
       }
 
-      C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_y[id]*C.momentum_y[id])/C.density[id];
-      #ifdef DE
-      C.GasEnergy[id]  = P/(gama-1.0);
-      #endif // DE
-
+      C.Energy[id] = P / (gama - 1.0) + 0.5 * (C.momentum_y[id] * C.momentum_y[id]) / C.density[id];
+#ifdef DE
+      C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
     }
   }
-
 }
 
-
 /*! \fn void Gresho()
  *  \brief Initialize the grid with the 2D Gresho problem described in LW03. */
 void Grid3D::Gresho()
@@ -845,8 +936,8 @@ void Grid3D::Gresho()
   Real d, vx, vy, P, v_boost;
   Real x, y, dx, dy;
   int ran, N;
-  N = 100000;
-  d = 1.0;
+  N       = 100000;
+  d       = 1.0;
   v_boost = 0.0;
 
   // center the vortex at (0.0,0.0)
@@ -857,92 +948,86 @@ void Grid3D::Gresho()
   srand(0);
 
   // set the initial values of the conserved variables
-  for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-    for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-      id = i + j*H.nx;
+  for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+      id = i + j * H.nx;
       // get the centered x and y positions
       Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
       // calculate centered radial position and phi
-      r = sqrt((x_pos-xc)*(x_pos-xc) + (y_pos-yc)*(y_pos-yc));
-      phi = atan2((y_pos-yc), (x_pos-xc));
-
-/*
-      // set vx, vy, P to zero before integrating
-      vx = 0.0;
-      vy = 0.0;
-      P = 0.0;
-
-      // monte carlo sample to get an integrated value for vx, vy, P
-      for (int ii = 0; ii<N; ii++) {
-        // get a random dx and dy to sample within the cell
-        ran = rand() % 1000;
-        dx = H.dx*(ran/1000.0 - 0.5);
-        ran = rand() % 1000;
-        dy = H.dy*(ran/1000.0 - 0.5);
-        x = x_pos + dx;
-        y = y_pos + dy;
-        // calculate r and phi using the new x & y positions
-        r = sqrt((x-xc)*(x-xc) + (y-yc)*(y-yc));
-        phi = atan2((y-yc), (x-xc));
-        if (r < 0.2) {
-          vx += -sin(phi)*5.0*r + v_boost;
-          vy += cos(phi)*5.0*r;
-          P += 5.0 + 0.5*25.0*r*r;
-        }
-        else if (r >= 0.2 && r < 0.4) {
-          vx += -sin(phi)*(2.0-5.0*r) + v_boost;
-          vy += cos(phi)*(2.0-5.0*r);
-          P += 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r);
-        }
-        else {
-          vx += 0.0;
-          vy += 0.0;
-          P += 3.0 + 4.0*log(2.0);
-        }
-      }
-      vx = vx/N;
-      vy = vy/N;
-      P = P/N;
-*/
+      r   = sqrt((x_pos - xc) * (x_pos - xc) + (y_pos - yc) * (y_pos - yc));
+      phi = atan2((y_pos - yc), (x_pos - xc));
+
+      /*
+            // set vx, vy, P to zero before integrating
+            vx = 0.0;
+            vy = 0.0;
+            P = 0.0;
+
+            // monte carlo sample to get an integrated value for vx, vy, P
+            for (int ii = 0; ii<N; ii++) {
+              // get a random dx and dy to sample within the cell
+              ran = rand() % 1000;
+              dx = H.dx*(ran/1000.0 - 0.5);
+              ran = rand() % 1000;
+              dy = H.dy*(ran/1000.0 - 0.5);
+              x = x_pos + dx;
+              y = y_pos + dy;
+              // calculate r and phi using the new x & y positions
+              r = sqrt((x-xc)*(x-xc) + (y-yc)*(y-yc));
+              phi = atan2((y-yc), (x-xc));
+              if (r < 0.2) {
+                vx += -sin(phi)*5.0*r + v_boost;
+                vy += cos(phi)*5.0*r;
+                P += 5.0 + 0.5*25.0*r*r;
+              }
+              else if (r >= 0.2 && r < 0.4) {
+                vx += -sin(phi)*(2.0-5.0*r) + v_boost;
+                vy += cos(phi)*(2.0-5.0*r);
+                P += 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r);
+              }
+              else {
+                vx += 0.0;
+                vy += 0.0;
+                P += 3.0 + 4.0*log(2.0);
+              }
+            }
+            vx = vx/N;
+            vy = vy/N;
+            P = P/N;
+      */
       if (r < 0.2) {
-        vx = -sin(phi)*5.0*r + v_boost;
-        vy = cos(phi)*5.0*r;
-        P = 5.0 + 0.5*25.0*r*r;
-      }
-      else if (r >= 0.2 && r < 0.4) {
-        vx = -sin(phi)*(2.0-5.0*r) + v_boost;
-        vy = cos(phi)*(2.0-5.0*r);
-        P = 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r);
-      }
-      else {
+        vx = -sin(phi) * 5.0 * r + v_boost;
+        vy = cos(phi) * 5.0 * r;
+        P  = 5.0 + 0.5 * 25.0 * r * r;
+      } else if (r >= 0.2 && r < 0.4) {
+        vx = -sin(phi) * (2.0 - 5.0 * r) + v_boost;
+        vy = cos(phi) * (2.0 - 5.0 * r);
+        P  = 9.0 - 4.0 * log(0.2) + 0.5 * 25.0 * r * r - 20.0 * r + 4.0 * log(r);
+      } else {
         vx = 0.0;
         vy = 0.0;
-        P = 3.0 + 4.0*log(2.0);
+        P  = 3.0 + 4.0 * log(2.0);
       }
       // set P constant for modified Gresho problem
-      //P = 5.5;
+      // P = 5.5;
 
       // set values of conserved variables
-      C.density[id] = d;
-      C.momentum_x[id] = d*vx;
-      C.momentum_y[id] = d*vy;
+      C.density[id]    = d;
+      C.momentum_x[id] = d * vx;
+      C.momentum_y[id] = d * vy;
       C.momentum_z[id] = 0.0;
-      C.Energy[id] = P/(gama-1.0) + 0.5*d*(vx*vx + vy*vy);
-      #ifdef DE
-      C.GasEnergy[id]  = P/(gama-1.0);
-      #endif // DE
+      C.Energy[id]     = P / (gama - 1.0) + 0.5 * d * (vx * vx + vy * vy);
+#ifdef DE
+      C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
 
-      //r = sqrt((x_pos-xc)*(x_pos-xc) + (y_pos-yc)*(y_pos-yc));
-      //printf("%f %f %f %f %f\n", x_pos, y_pos, r, vx, vy);
+      // r = sqrt((x_pos-xc)*(x_pos-xc) + (y_pos-yc)*(y_pos-yc));
+      // printf("%f %f %f %f %f\n", x_pos, y_pos, r, vx, vy);
     }
   }
-
-
 }
 
-
-
 /*! \fn void Implosion_2D()
  *  \brief Implosion test described in Liska, 2003. */
 void Grid3D::Implosion_2D()
@@ -951,44 +1036,41 @@ void Grid3D::Implosion_2D()
   Real x_pos, y_pos, z_pos;
   Real P;
 
-
   // set the initial values of the conserved variables
-  for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-    for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-      id = i + j*H.nx;
+  for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+      id = i + j * H.nx;
       // get the centered x and y positions
       Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
       // inner corner of box
       if (y_pos < (0.1500001 - x_pos)) {
-        C.density[id] = 0.125;
+        C.density[id]    = 0.125;
         C.momentum_x[id] = 0.0;
         C.momentum_y[id] = 0.0;
         C.momentum_z[id] = 0.0;
-        P = 0.14;
-        C.Energy[id] = P/(gama-1.0);
-        #ifdef DE
-        C.GasEnergy[id] = P/(gama-1.0);
-        #endif
+        P                = 0.14;
+        C.Energy[id]     = P / (gama - 1.0);
+#ifdef DE
+        C.GasEnergy[id] = P / (gama - 1.0);
+#endif
       }
       // everywhere else
       else {
-        C.density[id] = 1.0;
+        C.density[id]    = 1.0;
         C.momentum_x[id] = 0.0;
         C.momentum_y[id] = 0.0;
         C.momentum_z[id] = 0.0;
-        P = 1.0;
-        C.Energy[id] = P/(gama-1.0);
-        #ifdef DE
-        C.GasEnergy[id] = P/(gama-1.0);
-        #endif
+        P                = 1.0;
+        C.Energy[id]     = P / (gama - 1.0);
+#ifdef DE
+        C.GasEnergy[id] = P / (gama - 1.0);
+#endif
       }
     }
   }
-
 }
 
-
 /*! \fn void Noh_2D()
  *  \brief Noh test described in Liska, 2003. */
 void Grid3D::Noh_2D()
@@ -999,30 +1081,27 @@ void Grid3D::Noh_2D()
 
   P = 1.0e-6;
   // set the initial values of the conserved variables
-  for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-    for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-      id = i + j*H.nx;
+  for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+      id = i + j * H.nx;
       // get the centered x and y positions at (x,y,z)
       Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
-      C.density[id] = 1.0;
-      r = sqrt(x_pos*x_pos + y_pos*y_pos);
-      vx = x_pos / r;
-      vy = y_pos / r;
-      C.momentum_x[id] = - x_pos / r;
-      C.momentum_y[id] = - y_pos / r;
+      C.density[id]    = 1.0;
+      r                = sqrt(x_pos * x_pos + y_pos * y_pos);
+      vx               = x_pos / r;
+      vy               = y_pos / r;
+      C.momentum_x[id] = -x_pos / r;
+      C.momentum_y[id] = -y_pos / r;
       C.momentum_z[id] = 0.0;
-      C.Energy[id] = P/(gama-1.0) + 0.5;
-      #ifdef DE
-      C.GasEnergy[id]  = P/(gama-1.0);
-      #endif // DE
+      C.Energy[id]     = P / (gama - 1.0) + 0.5;
+#ifdef DE
+      C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
     }
   }
-
 }
 
-
-
 /*! \fn void Noh_3D()
  *  \brief Noh test described in Stone, 2008. */
 void Grid3D::Noh_3D()
@@ -1030,36 +1109,31 @@ void Grid3D::Noh_3D()
   int i, j, k, id;
   Real x_pos, y_pos, z_pos, r;
 
-  Real P=1.0e-6;
+  Real P = 1.0e-6;
 
   // set the initial values of the conserved variables
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // get the centered cell positions at (i,j,k)
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-        C.density[id] = 1.0;
-        r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos);
-        C.momentum_x[id] = - x_pos / r;
-        C.momentum_y[id] = - y_pos / r;
-        C.momentum_z[id] = - z_pos / r;
-        C.Energy[id] = P/(gama-1.0) + 0.5;
-        #ifdef DE
-        C.GasEnergy[id]  = P/(gama-1.0);
-        #endif //DE
+        C.density[id]    = 1.0;
+        r                = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos);
+        C.momentum_x[id] = -x_pos / r;
+        C.momentum_y[id] = -y_pos / r;
+        C.momentum_z[id] = -z_pos / r;
+        C.Energy[id]     = P / (gama - 1.0) + 0.5;
+#ifdef DE
+        C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
       }
     }
   }
-
-
-
 }
 
-
-
 /*! \fn void Disk_2D()
  *  \brief Initialize the grid with a 2D disk following a Kuzmin profile. */
 void Grid3D::Disk_2D()
@@ -1069,107 +1143,106 @@ void Grid3D::Disk_2D()
   Real d, n, a, a_d, a_h, v, vx, vy, P, T_d, x;
   Real M_vir, M_h, M_d, c_vir, R_vir, R_h, R_d, Sigma;
 
-  M_vir = 1.0e12; // viral mass of MW in M_sun
-  M_d = 6.5e10; // mass of disk in M_sun
-  M_h = M_vir - M_d; // halo mass in M_sun
-  R_vir = 261; // viral radius in kpc
-  c_vir = 20; // halo concentration
-  R_h = R_vir / c_vir; // halo scale length in kpc
-  R_d = 3.5; // disk scale length in kpc
-  T_d = 10000; // disk temperature, 10^4K
-
+  M_vir = 1.0e12;         // viral mass of MW in M_sun
+  M_d   = 6.5e10;         // mass of disk in M_sun
+  M_h   = M_vir - M_d;    // halo mass in M_sun
+  R_vir = 261;            // viral radius in kpc
+  c_vir = 20;             // halo concentration
+  R_h   = R_vir / c_vir;  // halo scale length in kpc
+  R_d   = 3.5;            // disk scale length in kpc
+  T_d   = 10000;          // disk temperature, 10^4K
 
   // set the initial values of the conserved variables
-  for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-    for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-      id = i + j*H.nx;
+  for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+      id = i + j * H.nx;
       // get the centered x and y positions
       Get_Position(i, j, H.n_ghost, &x_pos, &y_pos, &z_pos);
 
       // calculate centered radial position and phi
-      r = sqrt(x_pos*x_pos + y_pos*y_pos);
+      r   = sqrt(x_pos * x_pos + y_pos * y_pos);
       phi = atan2(y_pos, x_pos);
 
       // Disk surface density [M_sun / kpc^2]
       // Assume gas surface density is exponential with scale length 2*R_d and
       // mass 0.25*M_d
-      Sigma = 0.25*M_d * exp(-r/(2*R_d)) / (8*PI*R_d*R_d) ;
-      d = Sigma; // just use sigma for mass density since height is arbitrary
-      n = d * DENSITY_UNIT / MP; // number density, cgs
-      P = n*KB*T_d / PRESSURE_UNIT; // disk pressure, code units
+      Sigma = 0.25 * M_d * exp(-r / (2 * R_d)) / (8 * M_PI * R_d * R_d);
+      d     = Sigma;                         // just use sigma for mass density since height is arbitrary
+      n     = d * DENSITY_UNIT / MP;         // number density, cgs
+      P     = n * KB * T_d / PRESSURE_UNIT;  // disk pressure, code units
 
       // radial acceleration due to Kuzmin disk + NFW halo
-      x = r / R_h;
-      a_d = GN * M_d * r * pow(r*r + R_d*R_d, -1.5);
-      a_h = GN * M_h * (log(1+x)- x / (1+x)) / ((log(1+c_vir) - c_vir / (1+c_vir)) * r*r);
-      a = a_d + a_h;
+      x   = r / R_h;
+      a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5);
+      a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r);
+      a   = a_d + a_h;
 
       // circular velocity
-      v = sqrt(r*a);
-      vx = -sin(phi)*v;
-      vy = cos(phi)*v;
+      v  = sqrt(r * a);
+      vx = -sin(phi) * v;
+      vy = cos(phi) * v;
 
       // set values of conserved variables
-      C.density[id] = d;
-      C.momentum_x[id] = d*vx;
-      C.momentum_y[id] = d*vy;
+      C.density[id]    = d;
+      C.momentum_x[id] = d * vx;
+      C.momentum_y[id] = d * vy;
       C.momentum_z[id] = 0.0;
-      C.Energy[id] = P/(gama-1.0) + 0.5*d*(vx*vx + vy*vy);
+      C.Energy[id]     = P / (gama - 1.0) + 0.5 * d * (vx * vx + vy * vy);
 
-      #ifdef DE
-      C.GasEnergy[id]  = P/(gama-1.0);
-      #endif //DE
-      //printf("%e %e %f %f %f %f %f\n", x_pos, y_pos, d, Sigma, vx, vy, P);
+#ifdef DE
+      C.GasEnergy[id] = P / (gama - 1.0);
+#endif  // DE
+        // printf("%e %e %f %f %f %f %f\n", x_pos, y_pos, d, Sigma, vx, vy, P);
     }
   }
-
-
 }
 
 /*! \fn void Spherical_Overpressure_3D()
- *  \brief Spherical overdensity and overpressure causing an spherical explosion */
+ *  \brief Spherical overdensity and overpressure causing an spherical explosion
+ */
 void Grid3D::Spherical_Overpressure_3D()
 {
   int i, j, k, id;
   Real x_pos, y_pos, z_pos, r, center_x, center_y, center_z;
   Real density, pressure, overDensity, overPressure, energy;
   Real vx, vy, vz, v2;
-  center_x = 0.5;
-  center_y = 0.5;
-  center_z = 0.5;
-  overDensity = 1;
+  center_x     = 0.5;
+  center_y     = 0.5;
+  center_z     = 0.5;
+  overDensity  = 1;
   overPressure = 10;
-  vx = 0;
-  vy = 0;
-  vz = 0;
+  vx           = 0;
+  vy           = 0;
+  vz           = 0;
 
   // set the initial values of the conserved variables
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // // get the centered cell positions at (i,j,k)
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
-        density = 0.1;
+        density  = 0.1;
         pressure = 1;
 
-        r = sqrt( (x_pos-center_x)*(x_pos-center_x) + (y_pos-center_y)*(y_pos-center_y) + (z_pos-center_z)*(z_pos-center_z) );
-        if ( r < 0.2 ){
+        r = sqrt((x_pos - center_x) * (x_pos - center_x) + (y_pos - center_y) * (y_pos - center_y) +
+                 (z_pos - center_z) * (z_pos - center_z));
+        if (r < 0.2) {
           density = overDensity;
           pressure += overPressure;
         }
-        v2 = vx*vx + vy*vy + vz*vz;
-        energy = pressure/(gama-1) + 0.5*density*v2;
-        C.density[id] = density;
-        C.momentum_x[id] = density*vx;
-        C.momentum_y[id] = density*vy;
-        C.momentum_z[id] = density*vz;
-        C.Energy[id] = energy;
-
-        #ifdef DE
-        C.GasEnergy[id] = pressure/(gama-1);
-        #endif
+        v2               = vx * vx + vy * vy + vz * vz;
+        energy           = pressure / (gama - 1) + 0.5 * density * v2;
+        C.density[id]    = density;
+        C.momentum_x[id] = density * vx;
+        C.momentum_y[id] = density * vy;
+        C.momentum_z[id] = density * vz;
+        C.Energy[id]     = energy;
+
+#ifdef DE
+        C.GasEnergy[id] = pressure / (gama - 1);
+#endif
       }
     }
   }
@@ -1179,59 +1252,61 @@ void Grid3D::Spherical_Overpressure_3D()
  *  \brief Spherical overdensity for gravitational colapse */
 void Grid3D::Spherical_Overdensity_3D()
 {
- int i, j, k, id;
- Real x_pos, y_pos, z_pos, r, center_x, center_y, center_z;
- Real density, pressure, overDensity, overPressure, energy, radius, background_density;
- Real vx, vy, vz, v2;
- center_x = 0.5;
- center_y = 0.5;
- center_z = 0.5;
- overDensity = 1;
- overPressure = 0;
- vx = 0;
- vy = 0;
- vz = 0;
- radius = 0.2;
- background_density = 0.0005;
- H.sphere_density = overDensity;
- H.sphere_radius = radius;
- H.sphere_background_density = background_density;
- H.sphere_center_x = center_x;
- H.sphere_center_y = center_y;
- H.sphere_center_z = center_z;
-
- // set the initial values of the conserved variables
- for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-   for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-     for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-       id = i + j*H.nx + k*H.nx*H.ny;
-
-       // // get the centered cell positions at (i,j,k)
-       Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
-       density = background_density;
-       pressure = 0.0005;
-
-       r = sqrt( (x_pos-center_x)*(x_pos-center_x) + (y_pos-center_y)*(y_pos-center_y) + (z_pos-center_z)*(z_pos-center_z) );
-       if ( r < radius ){
-         density = overDensity;
-         pressure += overPressure;
-       }
-       v2 = vx*vx + vy*vy + vz*vz;
-       energy = pressure/(gama-1) + 0.5*density*v2;
-       C.density[id] = density;
-       C.momentum_x[id] = density*vx;
-       C.momentum_y[id] = density*vy;
-       C.momentum_z[id] = density*vz;
-       C.Energy[id] = energy;
-
-       #ifdef DE
-       C.GasEnergy[id] = pressure/(gama-1);
-       #endif
-     }
-   }
- }
-}
+  int i, j, k, id;
+  Real x_pos, y_pos, z_pos, r, center_x, center_y, center_z;
+  Real density, pressure, overDensity, overPressure, energy, radius, background_density;
+  Real vx, vy, vz, v2;
+  center_x = 0.5;
+  center_y = 0.5;
+  center_z = 0.5;
+  // overDensity = 1000 * mu * MP / DENSITY_UNIT; // 100 particles per cm^3
+  overDensity  = 1;
+  overPressure = 0;
+  vx           = 0;
+  vy           = 0;
+  vz           = 0;
+  radius       = 0.2;
+  // background_density = mu * MP / DENSITY_UNIT; // 1 particles per cm^3
+  background_density          = 0.0005;
+  H.sphere_density            = overDensity;
+  H.sphere_radius             = radius;
+  H.sphere_background_density = background_density;
+  H.sphere_center_x           = center_x;
+  H.sphere_center_y           = center_y;
+  H.sphere_center_z           = center_z;
+
+  // set the initial values of the conserved variables
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
+        // // get the centered cell positions at (i,j,k)
+        Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
+        density  = background_density;
+        pressure = 0.0005;
+
+        r = sqrt((x_pos - center_x) * (x_pos - center_x) + (y_pos - center_y) * (y_pos - center_y) +
+                 (z_pos - center_z) * (z_pos - center_z));
+        if (r < radius) {
+          density = overDensity;
+          pressure += overPressure;
+        }
+        v2               = vx * vx + vy * vy + vz * vz;
+        energy           = pressure / (gama - 1) + 0.5 * density * v2;
+        C.density[id]    = density;
+        C.momentum_x[id] = density * vx;
+        C.momentum_y[id] = density * vy;
+        C.momentum_z[id] = density * vz;
+        C.Energy[id]     = energy;
+
+#ifdef DE
+        C.GasEnergy[id] = pressure / (gama - 1);
+#endif
+      }
+    }
+  }
+}
 
 /*! \fn void Clouds()
  *  \brief Bunch of clouds. */
@@ -1240,309 +1315,295 @@ void Grid3D::Clouds()
   int i, j, k, id;
   int istart, jstart, kstart, iend, jend, kend;
   Real x_pos, y_pos, z_pos;
-  Real n_bg, n_cl; // background and cloud number density
-  Real rho_bg, rho_cl; // background and cloud density
-  Real vx_bg, vx_cl; // background and cloud velocity
+  Real n_bg, n_cl;      // background and cloud number density
+  Real rho_bg, rho_cl;  // background and cloud density
+  Real vx_bg, vx_cl;    // background and cloud velocity
   Real vy_bg, vy_cl;
   Real vz_bg, vz_cl;
-  Real T_bg, T_cl; // background and cloud temperature
-  Real p_bg, p_cl; // background and cloud pressure
-  Real mu = 0.6; // mean atomic weight
-  int N_cl = 1; // number of clouds
-  Real R_cl = 2.5; // cloud radius in code units (kpc)
-  Real cl_pos[N_cl][3]; // array of cloud positions
+  Real T_bg, T_cl;       // background and cloud temperature
+  Real p_bg, p_cl;       // background and cloud pressure
+  Real mu   = 0.6;       // mean atomic weight
+  int N_cl  = 1;         // number of clouds
+  Real R_cl = 2.5;       // cloud radius in code units (kpc)
+  Real cl_pos[N_cl][3];  // array of cloud positions
   Real r;
 
   // Multiple Cloud Setup
-  //for (int nn=0; nn<N_cl; nn++) {
+  // for (int nn=0; nn<N_cl; nn++) {
   //  cl_pos[nn][0] = (nn+1)*0.1*H.xdglobal+0.5*H.xdglobal;
   //  cl_pos[nn][1] = (nn%2*0.1+0.45)*H.ydglobal;
   //  cl_pos[nn][2] = 0.5*H.zdglobal;
-  //  printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]);
+  //  printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1],
+  //  cl_pos[nn][2]);
   //}
 
   // single centered cloud setup
-  for (int nn=0; nn<N_cl; nn++) {
-    cl_pos[nn][0] = 0.5*H.xdglobal;
-    cl_pos[nn][1] = 0.5*H.ydglobal;
-    cl_pos[nn][2] = 0.5*H.zdglobal;
+  for (int nn = 0; nn < N_cl; nn++) {
+    cl_pos[nn][0] = 0.5 * H.xdglobal;
+    cl_pos[nn][1] = 0.5 * H.ydglobal;
+    cl_pos[nn][2] = 0.5 * H.zdglobal;
     printf("Cloud positions: %f %f %f\n", cl_pos[nn][0], cl_pos[nn][1], cl_pos[nn][2]);
   }
 
-  n_bg = 1.68e-4;
-  n_cl  = 5.4e-2;
-  rho_bg = n_bg*mu*MP/DENSITY_UNIT;
-  rho_cl  = n_cl*mu*MP/DENSITY_UNIT;
-  vx_bg = 0.0;
-  //vx_c  = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr
-  vx_cl  = 0.0;
+  n_bg   = 1.68e-4;
+  n_cl   = 5.4e-2;
+  rho_bg = n_bg * mu * MP / DENSITY_UNIT;
+  rho_cl = n_cl * mu * MP / DENSITY_UNIT;
+  vx_bg  = 0.0;
+  // vx_c  = -200*TIME_UNIT/KPC; // convert from km/s to kpc/kyr
+  vx_cl = 0.0;
   vy_bg = vy_cl = 0.0;
   vz_bg = vz_cl = 0.0;
-  T_bg = 3e6;
-  T_cl = 1e4;
-  p_bg = n_bg*KB*T_bg / PRESSURE_UNIT;
-  p_cl = p_bg;
+  T_bg          = 3e6;
+  T_cl          = 1e4;
+  p_bg          = n_bg * KB * T_bg / PRESSURE_UNIT;
+  p_cl          = p_bg;
 
   istart = H.n_ghost;
-  iend   = H.nx-H.n_ghost;
+  iend   = H.nx - H.n_ghost;
   if (H.ny > 1) {
     jstart = H.n_ghost;
-    jend   = H.ny-H.n_ghost;
-  }
-  else {
+    jend   = H.ny - H.n_ghost;
+  } else {
     jstart = 0;
     jend   = H.ny;
   }
   if (H.nz > 1) {
     kstart = H.n_ghost;
-    kend   = H.nz-H.n_ghost;
-  }
-  else {
+    kend   = H.nz - H.n_ghost;
+  } else {
     kstart = 0;
     kend   = H.nz;
   }
 
   // set initial values of conserved variables
-  for(k=kstart; k<kend; k++) {
-    for(j=jstart; j<jend; j++) {
-      for(i=istart; i<iend; i++) {
-	
-        //get cell index
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = kstart; k < kend; k++) {
+    for (j = jstart; j < jend; j++) {
+      for (i = istart; i < iend; i++) {
+        // get cell index
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // get cell-centered position
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
         // set background state
         C.density[id]    = rho_bg;
-        C.momentum_x[id] = rho_bg*vx_bg;
-        C.momentum_y[id] = rho_bg*vy_bg;
-        C.momentum_z[id] = rho_bg*vz_bg;
-        C.Energy[id]     = p_bg/(gama-1.0) + 0.5*rho_bg*(vx_bg*vx_bg + vy_bg*vy_bg + vz_bg*vz_bg);
-        #ifdef DE
-        C.GasEnergy[id]  = p_bg/(gama-1.0);
-        #endif
-        #ifdef SCALAR
-        C.scalar[id] = C.density[id]*0.0;
-        #endif
-        // add clouds 
-        for (int nn = 0; nn<N_cl; nn++) {
-          r = sqrt((x_pos - cl_pos[nn][0])*(x_pos - cl_pos[nn][0]) + (y_pos - cl_pos[nn][1])*(y_pos - cl_pos[nn][1]) + (z_pos - cl_pos[nn][2])*(z_pos - cl_pos[nn][2]));
+        C.momentum_x[id] = rho_bg * vx_bg;
+        C.momentum_y[id] = rho_bg * vy_bg;
+        C.momentum_z[id] = rho_bg * vz_bg;
+        C.Energy[id]     = p_bg / (gama - 1.0) + 0.5 * rho_bg * (vx_bg * vx_bg + vy_bg * vy_bg + vz_bg * vz_bg);
+#ifdef DE
+        C.GasEnergy[id] = p_bg / (gama - 1.0);
+#endif
+#ifdef SCALAR
+  #ifdef DUST
+        C.host[id + H.n_cells * grid_enum::dust_density] = 0.0;
+  #endif
+#endif
+        // add clouds
+        for (int nn = 0; nn < N_cl; nn++) {
+          r = sqrt((x_pos - cl_pos[nn][0]) * (x_pos - cl_pos[nn][0]) +
+                   (y_pos - cl_pos[nn][1]) * (y_pos - cl_pos[nn][1]) +
+                   (z_pos - cl_pos[nn][2]) * (z_pos - cl_pos[nn][2]));
           if (r < R_cl) {
             C.density[id]    = rho_cl;
-            C.momentum_x[id] = rho_cl*vx_cl;
-            C.momentum_y[id] = rho_cl*vy_cl;
-            C.momentum_z[id] = rho_cl*vz_cl;
-            C.Energy[id]     = p_cl/(gama-1.0) + 0.5*rho_cl*(vx_cl*vx_cl + vy_cl*vy_cl + vz_cl*vz_cl);
-            #ifdef DE
-            C.GasEnergy[id]  = p_cl/(gama-1.0);
-            #endif
-            #ifdef SCALAR
-            C.scalar[id] = C.density[id]*0.3;
-            #endif
+            C.momentum_x[id] = rho_cl * vx_cl;
+            C.momentum_y[id] = rho_cl * vy_cl;
+            C.momentum_z[id] = rho_cl * vz_cl;
+            C.Energy[id]     = p_cl / (gama - 1.0) + 0.5 * rho_cl * (vx_cl * vx_cl + vy_cl * vy_cl + vz_cl * vz_cl);
+#ifdef DE
+            C.GasEnergy[id] = p_cl / (gama - 1.0);
+#endif  // DE
+#ifdef SCALAR
+  #ifdef DUST
+            C.host[id + H.n_cells * grid_enum::dust_density] = rho_cl * 1e-2;
+  #endif  // DUST
+#endif    // SCALAR
           }
         }
       }
     }
   }
-
 }
 
 void Grid3D::Uniform_Grid()
 {
-  chprintf( " Initializing Uniform Grid\n");
+  chprintf(" Initializing Uniform Grid\n");
   int i, j, k, id;
 
   // Set limits
   size_t const istart = H.n_ghost;
-  size_t const iend   = H.nx-H.n_ghost;
+  size_t const iend   = H.nx - H.n_ghost;
   size_t const jstart = H.n_ghost;
-  size_t const jend   = H.ny-H.n_ghost;
+  size_t const jend   = H.ny - H.n_ghost;
   size_t const kstart = H.n_ghost;
-  size_t const kend   = H.nz-H.n_ghost;
+  size_t const kend   = H.nz - H.n_ghost;
 
   // set the initial values of the conserved variables
-  for (k=kstart-1; k<kend; k++) {
-    for (j=jstart-1; j<jend; j++) {
-      for (i=istart-1; i<iend; i++) {
-
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = kstart - 1; k < kend; k++) {
+    for (j = jstart - 1; j < jend; j++) {
+      for (i = istart - 1; i < iend; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
-        #ifdef  MHD
-          // Set the magnetic field including the rightmost ghost cell on the
-          // left side which is really the left face of the first grid cell
-          C.magnetic_x[id] = 0;
-          C.magnetic_y[id] = 0;
-          C.magnetic_z[id] = 0;
-        #endif  // MHD
+#ifdef MHD
+        // Set the magnetic field including the rightmost ghost cell on the
+        // left side which is really the left face of the first grid cell
+        C.magnetic_x[id] = 0;
+        C.magnetic_y[id] = 0;
+        C.magnetic_z[id] = 0;
+#endif  // MHD
 
         // Exclude the rightmost ghost cell on the "left" side
-        if ((k >= kstart) and (j >= jstart) and (i >= istart))
-        {
-          C.density[id] = 0;
+        if ((k >= kstart) and (j >= jstart) and (i >= istart)) {
+          C.density[id]    = 0;
           C.momentum_x[id] = 0;
           C.momentum_y[id] = 0;
           C.momentum_z[id] = 0;
-          C.Energy[id] = 0;
+          C.Energy[id]     = 0;
 
-          #ifdef DE
+#ifdef DE
           C.GasEnergy[id] = 0;
-          #endif
+#endif
         }
       }
     }
   }
 }
 
-void Grid3D::Zeldovich_Pancake( struct parameters P ){
-
-  #ifndef COSMOLOGY
-  chprintf( "To run a Zeldovich Pancake COSMOLOGY has to be turned ON \n" );
+void Grid3D::Zeldovich_Pancake(struct Parameters P)
+{
+#ifndef COSMOLOGY
+  chprintf("To run a Zeldovich Pancake COSMOLOGY has to be turned ON \n");
   exit(-1);
-  #else
-
+#else
 
   int i, j, k, id;
   Real x_pos, y_pos, z_pos;
   Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x;
 
   chprintf("Setting Zeldovich Pancake initial conditions...\n");
-  H0 = P.H0;
-  h = H0 / 100;
+  H0      = P.H0;
+  h       = H0 / 100;
   Omega_M = P.Omega_M;
 
-  chprintf( " h = %f \n", h );
-  chprintf( " Omega_M = %f \n", Omega_M );
+  chprintf(" h = %f \n", h);
+  chprintf(" Omega_M = %f \n", Omega_M);
 
-  H0 /= 1000;               //[km/s / kpc]
-  G = G_COSMO;
-  rho_0 = 3*H0*H0 / ( 8*M_PI*G ) * Omega_M /h / h;
+  H0 /= 1000;  //[km/s / kpc]
+  G           = G_COSMO;
+  rho_0       = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h;
   z_zeldovich = 1;
-  z_init = P.Init_redshift;
-  chprintf( " rho_0 = %f \n", rho_0 );
-  chprintf( " z_init = %f \n", z_init );
-  chprintf( " z_zeldovich = %f \n", z_zeldovich );
+  z_init      = P.Init_redshift;
+  chprintf(" rho_0 = %f \n", rho_0);
+  chprintf(" z_init = %f \n", z_init);
+  chprintf(" z_zeldovich = %f \n", z_zeldovich);
 
   x_center = H.xdglobal / 2;
-  chprintf( " Peak Center = %f \n", x_center );
+  chprintf(" Peak Center = %f \n", x_center);
 
   T_init = 100;
-  chprintf( " T initial = %f \n", T_init );
-
-  k_x = 2 * M_PI /  H.xdglobal;
+  chprintf(" T initial = %f \n", T_init);
 
+  k_x = 2 * M_PI / H.xdglobal;
 
   char filename[100];
   // create the filename to read from
   strcpy(filename, P.indir);
   strcat(filename, "ics_zeldovich.dat");
-  chprintf( " Loading ICs File: %s\n", filename);
+  chprintf(" Loading ICs File: %s\n", filename);
 
   real_vector_t ics_values;
 
-  ifstream file_in( filename );
-  string line;
+  std::ifstream file_in(filename);
+  std::string line;
   Real ic_val;
-  if (file_in.is_open()){
-    while ( getline (file_in, line) ){
-      ic_val = atof( line.c_str() );
-      ics_values.push_back( ic_val );
+  if (file_in.is_open()) {
+    while (getline(file_in, line)) {
+      ic_val = atof(line.c_str());
+      ics_values.push_back(ic_val);
       // chprintf("%f\n", ic_val);
     }
     file_in.close();
-  }
-  else{
+  } else {
     chprintf("  Error: Unable to open ics zeldovich file\n");
     exit(1);
   }
   int nPoints = 256;
 
-
-
   Real dens, vel, temp, U, E, gamma;
   gamma = P.gamma;
 
   int index;
   // set the initial values of the conserved variables
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // // get the centered cell positions at (i,j,k)
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-        //Analytical Initial Conditions
-        // dens = rho_0 / ( 1 - ( 1 + z_zeldovich ) / ( 1 + z_init ) * cos( k_x*( x_pos - x_center )) );
-        // vel = - H0 * ( 1 + z_zeldovich ) / sqrt( 1 + z_init ) * sin( k_x*( x_pos - x_center )) / k_x;
-        // temp = T_init * pow( dens / rho_0, 2./3 );
-        // U = temp / (gamma - 1) / MP * KB * 1e-10 * dens;
-        // E = 0.5 * dens * vel * vel + U;
-
+        // Analytical Initial Conditions
+        //  dens = rho_0 / ( 1 - ( 1 + z_zeldovich ) / ( 1 + z_init ) * cos(
+        //  k_x*( x_pos - x_center )) ); vel = - H0 * ( 1 + z_zeldovich ) /
+        //  sqrt( 1 + z_init ) * sin( k_x*( x_pos - x_center )) / k_x; temp =
+        //  T_init * pow( dens / rho_0, 2./3 ); U = temp / (gamma - 1) / MP * KB
+        //  * 1e-10 * dens; E = 0.5 * dens * vel * vel + U;
 
-        index = (int( x_pos / H.dx ) + 0 ) %256;
+        index = (int(x_pos / H.dx) + 0) % 256;
         // index = ( index + 16 ) % 256;
-        dens = ics_values[ 0*nPoints + index];
-        vel = ics_values[ 1*nPoints + index];
-        E = ics_values[ 2*nPoints + index];
-        U = ics_values[ 3*nPoints + index];
+        dens = ics_values[0 * nPoints + index];
+        vel  = ics_values[1 * nPoints + index];
+        E    = ics_values[2 * nPoints + index];
+        U    = ics_values[3 * nPoints + index];
         // //
 
         // chprintf( "%f \n", vel );
-        C.density[id] = dens;
+        C.density[id]    = dens;
         C.momentum_x[id] = dens * vel;
         C.momentum_y[id] = 0;
         C.momentum_z[id] = 0;
-        C.Energy[id] = E;
-
-        #ifdef DE
-        C.GasEnergy[id] = U ;
-        #endif
+        C.Energy[id]     = E;
 
+  #ifdef DE
+        C.GasEnergy[id] = U;
+  #endif
       }
     }
   }
 
-  #endif //COSMOLOGY
-
+#endif  // COSMOLOGY
 }
 
-
-
-void Grid3D::Chemistry_Test( struct parameters P )
+void Grid3D::Chemistry_Test(struct Parameters P)
 {
-  chprintf( "Initializing Chemistry Test...\n");
+  chprintf("Initializing Chemistry Test...\n");
 
+#ifdef COSMOLOGY
+  Real H0, Omega_M, Omega_L, Omega_b, current_z, rho_gas_mean, kpc_cgs, G, z, h, mu, T0, U, rho_gas;
+  Real HI_frac, HII_frac, HeI_frac, HeII_frac, HeIII_frac, e_frac, metal_frac, _min;
 
-  #ifdef COSMOLOGY
-  Real H0, Omega_M, Omega_L, Omega_b, current_z, rho_gas_mean,  kpc_cgs, G, z, h, mu, T0, U,rho_gas;
-  Real HI_frac, HII_frac, HeI_frac, HeII_frac, HeIII_frac, e_frac, metal_frac,_min;
-
-  H0 = P.H0;
+  H0      = P.H0;
   Omega_M = P.Omega_M;
   Omega_L = P.Omega_L;
   Omega_b = P.Omega_b;
-  z = P.Init_redshift;
+  z       = P.Init_redshift;
   kpc_cgs = KPC_CGS;
-  G = G_COSMO;
-  h = H0/100;
-  T0 = 230.0;
+  G       = G_COSMO;
+  h       = H0 / 100;
+  T0      = 230.0;
 
   // M_sun = MSUN_CGS;
-  rho_gas_mean = 3*pow(H0*1e-3, 2)/(8*M_PI*G) * Omega_b / pow(h, 2)  ;
-  chprintf( " z = %f \n", z );
-  chprintf( " HO = %f \n", H0 );
-  chprintf( " Omega_L = %f \n", Omega_L );
-  chprintf( " Omega_M = %f \n", Omega_M );
-  chprintf( " Omega_b = %f \n", Omega_b );
-  chprintf( " rho_gas_mean = %f h^2 Msun kpc^-3\n", rho_gas_mean );
-  chprintf( " T0 = %f k\n", T0 );
-  rho_gas = rho_gas_mean * pow(h, 2) / pow( kpc_cgs, 3) * MSUN_CGS;
-  chprintf( " rho_gas = %e g/cm^3\n", rho_gas );
-
-
-
-
+  rho_gas_mean = 3 * pow(H0 * 1e-3, 2) / (8 * M_PI * G) * Omega_b / pow(h, 2);
+  chprintf(" z = %f \n", z);
+  chprintf(" HO = %f \n", H0);
+  chprintf(" Omega_L = %f \n", Omega_L);
+  chprintf(" Omega_M = %f \n", Omega_M);
+  chprintf(" Omega_b = %f \n", Omega_b);
+  chprintf(" rho_gas_mean = %f h^2 Msun kpc^-3\n", rho_gas_mean);
+  chprintf(" T0 = %f k\n", T0);
+  rho_gas = rho_gas_mean * pow(h, 2) / pow(kpc_cgs, 3) * MSUN_CGS;
+  chprintf(" rho_gas = %e g/cm^3\n", rho_gas);
 
   // frac_min = 1e-10;
   // HI_frac = INITIAL_FRACTION_HI;
@@ -1552,75 +1613,372 @@ void Grid3D::Chemistry_Test( struct parameters P )
   // HeIII_frac = frac_min;
   // e_frac = HII_frac + HeII_frac + 2*HeIII_frac;
   //
-  HI_frac = INITIAL_FRACTION_HI;
-  HII_frac = INITIAL_FRACTION_HII;
-  HeI_frac = INITIAL_FRACTION_HEI;
-  HeII_frac = INITIAL_FRACTION_HEII;
+  HI_frac    = INITIAL_FRACTION_HI;
+  HII_frac   = INITIAL_FRACTION_HII;
+  HeI_frac   = INITIAL_FRACTION_HEI;
+  HeII_frac  = INITIAL_FRACTION_HEII;
   HeIII_frac = INITIAL_FRACTION_HEIII;
-  e_frac = INITIAL_FRACTION_ELECTRON;
+  e_frac     = INITIAL_FRACTION_ELECTRON;
   metal_frac = INITIAL_FRACTION_METAL;
 
+  mu = (HI_frac + HII_frac + HeI_frac + HeII_frac + HeIII_frac) /
+       (HI_frac + HII_frac + (HeI_frac + HeII_frac + HeIII_frac) / 4 + e_frac);
+  U = rho_gas_mean * T0 / (gama - 1) / MP / mu * KB * 1e-10;
+  chprintf(" mu = %f \n", mu);
+  chprintf(" U0 = %f \n", U);
 
-  mu = ( HI_frac + HII_frac + HeI_frac + HeII_frac + HeIII_frac ) / ( HI_frac + HII_frac + (HeI_frac + HeII_frac + HeIII_frac)/4 + e_frac );
-  U = rho_gas_mean *  T0 / (gama - 1) / MP / mu * KB * 1e-10;
-  chprintf( " mu = %f \n", mu);
-  chprintf( " U0 = %f \n", U );
-
-  chprintf( " HI_0 = %f \n", rho_gas_mean * HI_frac );
-
+  chprintf(" HI_0 = %f \n", rho_gas_mean * HI_frac);
 
   int i, j, k, id;
   // set the initial values of the conserved variables
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
-        C.density[id] =  rho_gas_mean;
+        C.density[id]    = rho_gas_mean;
         C.momentum_x[id] = 0;
         C.momentum_y[id] = 0;
         C.momentum_z[id] = 0;
-        C.Energy[id] = U;
+        C.Energy[id]     = U;
 
-        #ifdef DE
+  #ifdef DE
         C.GasEnergy[id] = U;
-        #endif
-
-        #ifdef CHEMISTRY_GPU
-        C.HI_density[id]    =  rho_gas_mean * HI_frac;
-        C.HII_density[id]   =  rho_gas_mean * HII_frac;
-        C.HeI_density[id]   =  rho_gas_mean * HeI_frac;
-        C.HeII_density[id]  =  rho_gas_mean * HeII_frac;
-        C.HeIII_density[id] =  rho_gas_mean * HeIII_frac;
-        C.e_density[id]     =  rho_gas_mean * e_frac;
-        #endif
-
-
-        #ifdef COOLING_GRACKLE
-        C.scalar[0*H.n_cells + id] = rho_gas_mean * HI_frac;
-        C.scalar[1*H.n_cells + id] = rho_gas_mean * HII_frac;
-        C.scalar[2*H.n_cells + id] = rho_gas_mean * HeI_frac;
-        C.scalar[3*H.n_cells + id] = rho_gas_mean * HeII_frac;
-        C.scalar[4*H.n_cells + id] = rho_gas_mean * HeIII_frac;
-        C.scalar[5*H.n_cells + id] = rho_gas_mean * e_frac;
-        #ifdef GRACKLE_METALS
-        C.scalar[6*H.n_cells + id] = rho_gas_mean * metal_frac;
-        #endif
-        #endif
+  #endif
+
+  #ifdef CHEMISTRY_GPU
+        C.HI_density[id]    = rho_gas_mean * HI_frac;
+        C.HII_density[id]   = rho_gas_mean * HII_frac;
+        C.HeI_density[id]   = rho_gas_mean * HeI_frac;
+        C.HeII_density[id]  = rho_gas_mean * HeII_frac;
+        C.HeIII_density[id] = rho_gas_mean * HeIII_frac;
+        C.e_density[id]     = rho_gas_mean * e_frac;
+  #endif
+
+  #ifdef COOLING_GRACKLE
+        C.HI_density[id]    = rho_gas_mean * HI_frac;
+        C.HII_density[id]   = rho_gas_mean * HII_frac;
+        C.HeI_density[id]   = rho_gas_mean * HeI_frac;
+        C.HeII_density[id]  = rho_gas_mean * HeII_frac;
+        C.HeIII_density[id] = rho_gas_mean * HeIII_frac;
+        C.e_density[id]     = rho_gas_mean * e_frac;
+    #ifdef GRACKLE_METALS
+        C.metal_density[id] = rho_gas_mean * metal_frac;
+    #endif
+  #endif
+      }
+    }
+  }
 
+#else   // COSMOLOGY
+  chprintf("This requires COSMOLOGY turned on! \n");
+  chexit(-1);
+#endif  // COSMOLOGY
+}
 
+#ifdef MHD
+void Grid3D::Circularly_Polarized_Alfven_Wave(struct Parameters const P)
+{
+  // This test is only meaningful for a limited number of parameter values so I will check them here
+  assert(P.polarization == 1.0 or
+         P.polarization == -1.0 and
+             "The polarization for this test must be 1 (right polarized) or -1 (left polarized).");
+  assert(std::abs(P.vx) == 1.0 or
+         P.vx == 0.0 and "The x velocity for this test must be 0 (traveling wave) or 1 (standing wave).");
+
+  // Check the domain and angles
+  auto checkDomain = [](int const &nx, int const &ny, int const &nz, Real const &xlen, Real const &ylen,
+                        Real const &zlen) {
+    assert(nx == 2 * ny and nx == 2 * nz and "This test requires that the number of cells be of shape 2L x L x L");
+    assert(xlen == 2 * ylen and xlen == 2 * zlen and "This test requires that the domain be of shape 2L x L x L");
+  };
+  if ((P.pitch == 0.0 and P.yaw == 0.0) or (P.pitch == std::asin(2. / 3.) and P.yaw == std::asin(2. / std::sqrt(5.)))) {
+    checkDomain(P.nx, P.ny, P.nz, P.xlen, P.ylen, P.zlen);
+  } else if (P.pitch == 0.5 * M_PI and P.yaw == 0.0) {
+    checkDomain(P.ny, P.nz, P.nx, P.ylen, P.zlen, P.xlen);
+  } else if (P.pitch == 0.0 and P.yaw == 0.5 * M_PI) {
+    checkDomain(P.nz, P.nx, P.ny, P.zlen, P.xlen, P.ylen);
+  } else {
+    assert(false and "This test does not support these angles");
+  }
+
+  // Parameters for tests.
+  Real const density    = 1.0;
+  Real const pressure   = 0.1;
+  Real const velocity_x = P.vx;
+  Real const amplitude  = 0.1;  // the amplitude of the wave
+  Real const magnetic_x = 1.0;
+
+  // Angles
+  Real const sin_yaw   = std::sin(P.yaw);
+  Real const cos_yaw   = std::cos(P.yaw);
+  Real const sin_pitch = std::sin(P.pitch);
+  Real const cos_pitch = std::cos(P.pitch);
+
+  // Compute the wave quantities
+  Real const wavelength = 1.;
+  Real const wavenumber = 2.0 * M_PI / wavelength;  // the angular wave number k
+
+  // Compute the vector potentials
+  std::vector<Real> vectorPotential(3 * H.n_cells, 0);
+  auto Compute_Vector_Potential = [&](Real const &x_loc, Real const &y_loc, Real const &z_loc) {
+    // The "_rot" variables are the rotated version
+    Real const x_rot = x_loc * cos_pitch * cos_yaw + y_loc * cos_pitch * sin_yaw + z_loc * sin_pitch;
+    Real const y_rot = -x_loc * sin_yaw + y_loc * cos_yaw;
+
+    Real const a_y = P.polarization * (amplitude / wavenumber) * std::sin(wavenumber * x_rot);
+    Real const a_z = (amplitude / wavenumber) * std::cos(wavenumber * x_rot) + magnetic_x * y_rot;
+
+    return std::make_pair(a_y, a_z);
+  };
+
+  for (int k = 0; k < H.nz; k++) {
+    for (int j = 0; j < H.ny; j++) {
+      for (int i = 0; i < H.nx; i++) {
+        // Get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        Real x, y, z;
+        Get_Position(i, j, k, &x, &y, &z);
+
+        auto vectorPot                         = Compute_Vector_Potential(x, y + H.dy / 2., z + H.dz / 2.);
+        vectorPotential.at(id + 0 * H.n_cells) = -vectorPot.first * sin_yaw - vectorPot.second * sin_pitch * cos_yaw;
+
+        vectorPot                              = Compute_Vector_Potential(x + H.dx / 2., y, z + H.dz / 2.);
+        vectorPotential.at(id + 1 * H.n_cells) = vectorPot.first * cos_yaw - vectorPot.second * sin_pitch * sin_yaw;
+
+        vectorPot                              = Compute_Vector_Potential(x + H.dx / 2., y + H.dy / 2., z);
+        vectorPotential.at(id + 2 * H.n_cells) = vectorPot.second * cos_pitch;
       }
     }
   }
 
-  #else //COSMOLOGY
-  chprintf( "This requires COSMOLOGY turned on! \n");
-  chexit(-1);
-  #endif //COSMOLOGY
+  // Compute the magnetic field
+  mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential);
+
+  // set initial values of non-magnetic conserved variables
+  for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) {
+    for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) {
+      for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) {
+        // get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // get cell-centered position
+        Real x_pos, y_pos, z_pos;
+        Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
+        Real const x_pos_rot = x_pos * cos_pitch * cos_yaw + y_pos * cos_pitch * sin_yaw + z_pos * sin_pitch;
+
+        // Compute the momentum
+        Real const momentum_x = density * velocity_x;
+        Real const momentum_y = -P.polarization * density * amplitude * std::sin(wavenumber * x_pos_rot);
+        Real const momentum_z = -density * amplitude * std::cos(wavenumber * x_pos_rot);
+        Real const momentum_x_rot =
+            momentum_x * cos_pitch * cos_yaw - momentum_y * sin_yaw - momentum_z * sin_pitch * cos_yaw;
+        Real const momentum_y_rot =
+            momentum_x * cos_pitch * sin_yaw + momentum_y * cos_yaw - momentum_z * sin_pitch * sin_yaw;
+        Real const momentum_z_rot = momentum_x * sin_pitch + momentum_z * cos_pitch;
+
+        // Compute the Energy
+        auto const magnetic_centered =
+            mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny);
+        Real const energy = hydro_utilities::Calc_Energy_Conserved(pressure, density, momentum_x_rot, momentum_y_rot,
+                                                                   momentum_z_rot, ::gama, magnetic_centered.x,
+                                                                   magnetic_centered.y, magnetic_centered.z);
+
+        // Final assignment
+        C.density[id]    = density;
+        C.momentum_x[id] = momentum_x_rot;
+        C.momentum_y[id] = momentum_y_rot;
+        C.momentum_z[id] = momentum_z_rot;
+        C.Energy[id]     = energy;
+      }
+    }
+  }
+}
+
+void Grid3D::Advecting_Field_Loop(struct Parameters const P)
+{
+  // This test is only meaningful for a limited number of parameter values so I will check them here
+  // Check that the domain is centered on zero
+  assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and
+         "Domain must be centered at zero");
+
+  // Check that P.radius is smaller than the size of the domain
+  Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2);
+  assert(domain_size > P.radius and "The size of the domain must be greater than P.radius");
+
+  // Compute the vector potential. Since the vector potential std::vector is initialized to zero I will only assign new
+  // values when required and ignore the cases where I would be assigning zero
+  std::vector<Real> vectorPotential(3 * H.n_cells, 0);
+  for (int k = 0; k < H.nz; k++) {
+    for (int j = 0; j < H.ny; j++) {
+      for (int i = 0; i < H.nx; i++) {
+        // Get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // Get the cell centered positions
+        Real x, y, z;
+        Get_Position(i, j, k, &x, &y, &z);
+
+        // Y vector potential
+        Real radius = std::hypot(x + H.dx / 2., y, z + H.dz / 2.);
+        if (radius < P.radius) {
+          vectorPotential.at(id + 1 * H.n_cells) = P.A * (P.radius - radius);
+        }
+
+        // Z vector potential
+        radius = std::hypot(x + H.dx / 2., y + H.dy / 2., z);
+        if (radius < P.radius) {
+          vectorPotential.at(id + 2 * H.n_cells) = P.A * (P.radius - radius);
+        }
+      }
+    }
+  }
 
+  // Initialize the magnetic fields
+  mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential);
+
+  // Initialize the hydro variables
+  for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) {
+    for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) {
+      for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) {
+        // get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // Compute the cell centered magnetic fields
+        auto const magnetic_centered =
+            mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny);
+
+        // Assignment
+        C.density[id]    = P.rho;
+        C.momentum_x[id] = P.rho * P.vx;
+        C.momentum_y[id] = P.rho * P.vy;
+        C.momentum_z[id] = P.rho * P.vz;
+        C.Energy[id]     = hydro_utilities::Calc_Energy_Conserved(P.P, P.rho, C.momentum_x[id], C.momentum_y[id],
+                                                                  C.momentum_z[id], ::gama, magnetic_centered.x,
+                                                                  magnetic_centered.y, magnetic_centered.z);
+      }
+    }
+  }
 }
 
+void Grid3D::MHD_Spherical_Blast(struct Parameters const P)
+{
+  // This test is only meaningful for a limited number of parameter values so I will check them here
+  // Check that the domain is centered on zero
+  assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and
+         "Domain must be centered at zero");
+
+  // Check that P.radius is smaller than the size of the domain
+  Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2);
+  assert(domain_size > P.radius and "The size of the domain must be greater than P.radius");
+
+  // Initialize the magnetic field
+  for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) {
+    for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) {
+      for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) {
+        // get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        C.magnetic_x[id] = P.Bx;
+        C.magnetic_y[id] = P.By;
+        C.magnetic_z[id] = P.Bz;
+      }
+    }
+  }
 
+  for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) {
+    for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) {
+      for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) {
+        // get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // Set the fields that don't depend on pressure
+        C.density[id]    = P.rho;
+        C.momentum_x[id] = P.rho * P.vx;
+        C.momentum_y[id] = P.rho * P.vy;
+        C.momentum_z[id] = P.rho * P.vz;
+
+        // Get the cell centered positions
+        Real x, y, z;
+        Get_Position(i, j, k, &x, &y, &z);
+
+        // Compute the magnetic field in this cell
+        auto const magnetic_centered =
+            mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny);
+
+        // Set the field(s) that do depend on pressure. That's just energy
+        Real const radius = std::hypot(x, y, z);
+        Real pressure;
+        if (radius < P.radius) {
+          pressure = P.P_blast;
+        } else {
+          pressure = P.P;
+        }
+        C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(
+            pressure, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x,
+            magnetic_centered.y, magnetic_centered.z);
+      }
+    }
+  }
+}
 
+void Grid3D::Orszag_Tang_Vortex()
+{
+  // This problem requires specific parameters so I will define them here
+  Real const magnetic_background = 1.0 / std::sqrt(4.0 * M_PI);
+  Real const density_background  = 25.0 / (36.0 * M_PI);
+  Real const velocity_background = 1.0;
+  Real const pressure_background = 5.0 / (12.0 * M_PI);
+
+  // Compute the vector potential. Since the vector potential std::vector is initialized to zero I will only assign new
+  // values when required and ignore the cases where I would be assigning zero
+  std::vector<Real> vectorPotential(3 * H.n_cells, 0);
+  for (int k = 0; k < H.nz; k++) {
+    for (int j = 0; j < H.ny; j++) {
+      for (int i = 0; i < H.nx; i++) {
+        // Get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // Get the cell centered positions
+        Real x, y, z;
+        Get_Position(i, j, k, &x, &y, &z);
+
+        // Z vector potential
+        vectorPotential.at(id + 2 * H.n_cells) =
+            magnetic_background / (4.0 * M_PI) * (std::cos(4.0 * M_PI * x) + 2.0 * std::cos(2.0 * M_PI * y));
+      }
+    }
+  }
 
+  // Initialize the magnetic fields
+  mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential);
 
+  // Initialize the hydro variables
+  for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) {
+    for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) {
+      for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) {
+        // get cell index
+        int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+
+        // Get the cell centered positions
+        Real x, y, z;
+        Get_Position(i, j, k, &x, &y, &z);
+
+        // Compute the cell centered magnetic fields
+        auto const magnetic_centered =
+            mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny);
+
+        // Assignment
+        C.density[id]    = density_background;
+        C.momentum_x[id] = density_background * velocity_background * std::sin(2.0 * M_PI * y);
+        C.momentum_y[id] = -density_background * velocity_background * std::sin(2.0 * M_PI * x);
+        C.momentum_z[id] = 0.0;
+        C.Energy[id]     = hydro_utilities::Calc_Energy_Conserved(
+            pressure_background, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama,
+            magnetic_centered.x, magnetic_centered.y, magnetic_centered.z);
+      }
+    }
+  }
+}
+#endif  // MHD
diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp
index 2d4c40bf5..747bcd6ec 100644
--- a/src/grid/mpi_boundaries.cpp
+++ b/src/grid/mpi_boundaries.cpp
@@ -1,47 +1,44 @@
-#include "../grid/grid3D.h"
-#include "../mpi/mpi_routines.h"
-#include "../io/io.h"
-#include "../utils/error_handling.h"
 #include <iostream>
 
+#include "../global/global_cuda.h"    //provides TPB
+#include "../grid/cuda_boundaries.h"  // provides PackBuffers3D and UnpackBuffers3D
+#include "../io/io.h"
+#include "../mpi/mpi_routines.h"
+#include "../utils/error_handling.h"
 #include "../utils/gpu.hpp"
-#include "../global/global_cuda.h"//provides TPB
-#include "../grid/cuda_boundaries.h"// provides PackBuffers3D and UnpackBuffers3D
+#include "grid3D.h"
 
 #ifdef MPI_CHOLLA
 
-void Grid3D::Set_Boundaries_MPI(struct parameters P)
+void Grid3D::Set_Boundaries_MPI(struct Parameters P)
 {
-  int flags[6] = {0,0,0,0,0,0};
+  int flags[6] = {0, 0, 0, 0, 0, 0};
 
-  if(Check_Custom_Boundary(&flags[0],P))
-  {
-    //perform custom boundaries
+  if (Check_Custom_Boundary(&flags[0], P)) {
+    // perform custom boundaries
     Custom_Boundary(P.custom_bcnd);
   }
 
-  Set_Boundaries_MPI_BLOCK(flags,P);
+  Set_Boundaries_MPI_BLOCK(flags, P);
 
   #ifdef GRAVITY
-  Grav.Set_Boundary_Flags( flags );
+  Grav.Set_Boundary_Flags(flags);
   #endif
-
 }
 
-void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P)
+void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct Parameters P)
 {
   #ifdef PARTICLES
   // Clear the vectors that contain the particles IDs to be transfred
-  if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
+  if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
     Particles.Clear_Particles_For_Transfer();
-    Particles.Select_Particles_to_Transfer_All( flags );
+    Particles.Select_Particles_to_Transfer_All(flags);
   }
   #endif
 
   if (H.nx > 1) {
-
     /* Step 1 - Send MPI x-boundaries */
-    if (flags[0]==5 || flags[1]==5) {
+    if (flags[0] == 5 || flags[1] == 5) {
       Load_and_Send_MPI_Comm_Buffers(0, flags);
     }
 
@@ -51,20 +48,20 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P)
 
     /* Step 3 - Receive MPI x-boundaries */
 
-    if (flags[0]==5 || flags[1]==5) {
+    if (flags[0] == 5 || flags[1] == 5) {
       Wait_and_Unload_MPI_Comm_Buffers(0, flags);
-      #ifdef PARTICLES
+  #ifdef PARTICLES
       // Unload Particles buffers when transfering Particles
-      if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags);
-      #endif
+      if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags);
+      }
+  #endif
     }
-
   }
   MPI_Barrier(world);
   if (H.ny > 1) {
-
     /* Step 4 - Send MPI y-boundaries */
-    if (flags[2]==5 || flags[3]==5) {
+    if (flags[2] == 5 || flags[3] == 5) {
       Load_and_Send_MPI_Comm_Buffers(1, flags);
     }
 
@@ -73,19 +70,20 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P)
     Set_Boundaries(3, flags);
 
     /* Step 6 - Receive MPI y-boundaries */
-    if (flags[2]==5 || flags[3]==5) {
+    if (flags[2] == 5 || flags[3] == 5) {
       Wait_and_Unload_MPI_Comm_Buffers(1, flags);
-      #ifdef PARTICLES
+  #ifdef PARTICLES
       // Unload Particles buffers when transfering Particles
-      if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags);
-      #endif
+      if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags);
+      }
+  #endif
     }
   }
   MPI_Barrier(world);
   if (H.nz > 1) {
-
     /* Step 7 - Send MPI z-boundaries */
-    if (flags[4]==5 || flags[5]==5) {
+    if (flags[4] == 5 || flags[5] == 5) {
       Load_and_Send_MPI_Comm_Buffers(2, flags);
     }
 
@@ -94,789 +92,758 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P)
     Set_Boundaries(5, flags);
 
     /* Step 9 - Receive MPI z-boundaries */
-    if (flags[4]==5 || flags[5]==5) {
+    if (flags[4] == 5 || flags[5] == 5) {
       Wait_and_Unload_MPI_Comm_Buffers(2, flags);
-      #ifdef PARTICLES
+  #ifdef PARTICLES
       // Unload Particles buffers when transfering Particles
-      if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags);
-      #endif
+      if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags);
+      }
+  #endif
     }
   }
 
   #ifdef PARTICLES
-  if ( Particles.TRANSFER_PARTICLES_BOUNDARIES)  Finish_Particles_Transfer();
+  if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+    Finish_Particles_Transfer();
+  }
   #endif
-
 }
 
-
-int Grid3D::Load_Hydro_DeviceBuffer_X0 ( Real *send_buffer_x0 ){
-
+int Grid3D::Load_Hydro_DeviceBuffer_X0(Real *send_buffer_x0)
+{
   // 1D
   if (H.ny == 1 && H.nz == 1) {
     int idxoffset = H.n_ghost;
-    PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1);
+    PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1);
   }
   // 2D
   if (H.ny > 1 && H.nz == 1) {
-    int idxoffset = H.n_ghost + H.n_ghost*H.nx;
-    PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1);
+    int idxoffset = H.n_ghost + H.n_ghost * H.nx;
+    PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                  H.ny - 2 * H.n_ghost, 1);
   }
   // 3D
   if (H.ny > 1 && H.nz > 1) {
-    int idxoffset = H.n_ghost + H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny;
-    PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny;
+    PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                  H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost);
   }
 
   return x_buffer_length;
 }
 
-
 // load right x communication buffer
-int Grid3D::Load_Hydro_DeviceBuffer_X1 ( Real *send_buffer_x1 ){
-
+int Grid3D::Load_Hydro_DeviceBuffer_X1(Real *send_buffer_x1)
+{
   // 1D
   if (H.ny == 1 && H.nz == 1) {
-    int idxoffset = H.nx-2*H.n_ghost;
-    PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1);
+    int idxoffset = H.nx - 2 * H.n_ghost;
+    PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1);
   }
   // 2D
   if (H.ny > 1 && H.nz == 1) {
-    int idxoffset = H.nx-2*H.n_ghost + H.n_ghost*H.nx;
-    PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1);
+    int idxoffset = H.nx - 2 * H.n_ghost + H.n_ghost * H.nx;
+    PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                  H.ny - 2 * H.n_ghost, 1);
   }
   // 3D
   if (H.ny > 1 && H.nz > 1) {
-    int idxoffset = H.nx-2*H.n_ghost + H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny;
-    PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = H.nx - 2 * H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny;
+    PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                  H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost);
   }
 
   return x_buffer_length;
 }
 
 // load left y communication buffer
-int Grid3D::Load_Hydro_DeviceBuffer_Y0 ( Real *send_buffer_y0 ){
-
+int Grid3D::Load_Hydro_DeviceBuffer_Y0(Real *send_buffer_y0)
+{
   // 2D
   if (H.nz == 1) {
-    int idxoffset = H.n_ghost*H.nx;
-    PackBuffers3D(send_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1);
+    int idxoffset = H.n_ghost * H.nx;
+    PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1);
   }
   // 3D
   if (H.nz > 1) {
-    int idxoffset = H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny;
-    PackBuffers3D(send_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny;
+    PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost,
+                  H.nz - 2 * H.n_ghost);
   }
 
   return y_buffer_length;
 }
 
-int Grid3D::Load_Hydro_DeviceBuffer_Y1 ( Real *send_buffer_y1 ){
-
+int Grid3D::Load_Hydro_DeviceBuffer_Y1(Real *send_buffer_y1)
+{
   // 2D
   if (H.nz == 1) {
-    int idxoffset = (H.ny-2*H.n_ghost)*H.nx;
-    PackBuffers3D(send_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1);
+    int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx;
+    PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1);
   }
   // 3D
   if (H.nz > 1) {
-    int idxoffset = (H.ny-2*H.n_ghost)*H.nx + H.n_ghost*H.nx*H.ny;
-    PackBuffers3D(send_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny;
+    PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost,
+                  H.nz - 2 * H.n_ghost);
   }
 
   return y_buffer_length;
-
 }
 
 // load left z communication buffer
-int Grid3D::Load_Hydro_DeviceBuffer_Z0 ( Real *send_buffer_z0 ){
-
+int Grid3D::Load_Hydro_DeviceBuffer_Z0(Real *send_buffer_z0)
+{
   // 3D
-  int idxoffset = H.n_ghost*H.nx*H.ny;
-  PackBuffers3D(send_buffer_z0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost);
+  int idxoffset = H.n_ghost * H.nx * H.ny;
+  PackBuffers3D(send_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost);
 
   return z_buffer_length;
 }
 
-int Grid3D::Load_Hydro_DeviceBuffer_Z1 ( Real *send_buffer_z1 ){
-
+int Grid3D::Load_Hydro_DeviceBuffer_Z1(Real *send_buffer_z1)
+{
   // 3D
-  int idxoffset = (H.nz-2*H.n_ghost)*H.nx*H.ny;
-  PackBuffers3D(send_buffer_z1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost);
+  int idxoffset = (H.nz - 2 * H.n_ghost) * H.nx * H.ny;
+  PackBuffers3D(send_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost);
 
   return z_buffer_length;
 }
 
-void Grid3D::Unload_Hydro_DeviceBuffer_X0 ( Real *recv_buffer_x0 ) {
-
+void Grid3D::Unload_Hydro_DeviceBuffer_X0(Real *recv_buffer_x0)
+{
   // 1D
   if (H.ny == 1 && H.nz == 1) {
     int idxoffset = 0;
-    UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1);
+    UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1);
   }
   // 2D
   if (H.ny > 1 && H.nz == 1) {
-    int idxoffset = H.n_ghost*H.nx;
-    UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1);
+    int idxoffset = H.n_ghost * H.nx;
+    UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                    H.ny - 2 * H.n_ghost, 1);
   }
   // 3D
   if (H.nz > 1) {
-    int idxoffset = H.n_ghost*(H.nx+H.nx*H.ny);
-    UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = H.n_ghost * (H.nx + H.nx * H.ny);
+    UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                    H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost);
   }
-
 }
 
-void Grid3D::Unload_Hydro_DeviceBuffer_X1 ( Real *recv_buffer_x1 ) {
-
+void Grid3D::Unload_Hydro_DeviceBuffer_X1(Real *recv_buffer_x1)
+{
   // 1D
   if (H.ny == 1 && H.nz == 1) {
     int idxoffset = H.nx - H.n_ghost;
-    UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1);
+    UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1);
   }
   // 2D
   if (H.ny > 1 && H.nz == 1) {
-    int idxoffset = H.nx - H.n_ghost + H.n_ghost*H.nx;
-    UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1);
+    int idxoffset = H.nx - H.n_ghost + H.n_ghost * H.nx;
+    UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                    H.ny - 2 * H.n_ghost, 1);
   }
   // 3D
   if (H.nz > 1) {
-    int idxoffset = H.nx - H.n_ghost + H.n_ghost*(H.nx+H.nx*H.ny);
-    UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = H.nx - H.n_ghost + H.n_ghost * (H.nx + H.nx * H.ny);
+    UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost,
+                    H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost);
   }
-
 }
 
-
-void Grid3D::Unload_Hydro_DeviceBuffer_Y0 ( Real *recv_buffer_y0 ) {
-
+void Grid3D::Unload_Hydro_DeviceBuffer_Y0(Real *recv_buffer_y0)
+{
   // 2D
   if (H.nz == 1) {
     int idxoffset = 0;
-    UnpackBuffers3D(recv_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1);
+    UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1);
   }
   // 3D
   if (H.nz > 1) {
-    int idxoffset = H.n_ghost*H.nx*H.ny;
-    UnpackBuffers3D(recv_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = H.n_ghost * H.nx * H.ny;
+    UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost,
+                    H.nz - 2 * H.n_ghost);
   }
-
 }
 
-
-void Grid3D::Unload_Hydro_DeviceBuffer_Y1 ( Real *recv_buffer_y1 ) {
-
+void Grid3D::Unload_Hydro_DeviceBuffer_Y1(Real *recv_buffer_y1)
+{
   // 2D
   if (H.nz == 1) {
-    int idxoffset = (H.ny-H.n_ghost)*H.nx;
-    UnpackBuffers3D(recv_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1);
+    int idxoffset = (H.ny - H.n_ghost) * H.nx;
+    UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1);
   }
   // 3D
   if (H.nz > 1) {
-    int idxoffset = (H.ny-H.n_ghost)*H.nx + H.n_ghost*H.nx*H.ny;
-    UnpackBuffers3D(recv_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost);
+    int idxoffset = (H.ny - H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny;
+    UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost,
+                    H.nz - 2 * H.n_ghost);
   }
-
 }
 
-
-
-void Grid3D::Unload_Hydro_DeviceBuffer_Z0 ( Real *recv_buffer_z0 ) {
-
+void Grid3D::Unload_Hydro_DeviceBuffer_Z0(Real *recv_buffer_z0)
+{
   // 3D
   int idxoffset = 0;
-  UnpackBuffers3D(recv_buffer_z0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost);
+  UnpackBuffers3D(recv_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost);
 }
 
-
-void Grid3D::Unload_Hydro_DeviceBuffer_Z1 ( Real *recv_buffer_z1 ) {
-
+void Grid3D::Unload_Hydro_DeviceBuffer_Z1(Real *recv_buffer_z1)
+{
   // 3D
-  int idxoffset = (H.nz-H.n_ghost)*H.nx*H.ny;
-  UnpackBuffers3D(recv_buffer_z1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost);
+  int idxoffset = (H.nz - H.n_ghost) * H.nx * H.ny;
+  UnpackBuffers3D(recv_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost);
 }
 
 void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags)
 {
-
   #ifdef PARTICLES
   // Select which particles need to be transfred for this direction
-  // if ( Particles.TRANSFER_PARTICLES_BOUNDARIES) Particles.Select_Particles_to_Transfer( dir );
+  // if ( Particles.TRANSFER_PARTICLES_BOUNDARIES)
+  // Particles.Select_Particles_to_Transfer( dir );
 
   // Initialize MPI requests for particles transfers
   int ireq_n_particles, ireq_particles_transfer;
-  ireq_n_particles = 0;
+  ireq_n_particles        = 0;
   ireq_particles_transfer = 0;
   #endif
 
   int ireq;
   ireq = 0;
 
-  int xbsize = x_buffer_length,
-      ybsize = y_buffer_length,
-      zbsize = z_buffer_length;
+  int xbsize = x_buffer_length, ybsize = y_buffer_length, zbsize = z_buffer_length;
 
   int buffer_length;
 
-  // Flag to omit the transfer of the main buffer when tranferring the particles buffer
+  // Flag to omit the transfer of the main buffer when tranferring the particles
+  // buffer
   bool transfer_main_buffer = true;
 
   /* x boundaries */
-  if(dir == 0)
-  {
-    if (flags[0]==5) {
-
+  if (dir == 0) {
+    if (flags[0] == 5) {
       // load left x communication buffer
-      if ( H.TRANSFER_HYDRO_BOUNDARIES )
-        {
+      if (H.TRANSFER_HYDRO_BOUNDARIES) {
         buffer_length = Load_Hydro_DeviceBuffer_X0(d_send_buffer_x0);
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        }
-
-      #ifdef GRAVITY
-      if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
-        #ifdef GRAVITY_GPU
-        buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 0, 0, d_send_buffer_x0, 0 );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-        buffer_length = Load_Gravity_Potential_To_Buffer( 0, 0, h_send_buffer_x0, 0 );
-        #endif
-
+  #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+  #endif
       }
-      #ifdef SOR
-      if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES )  buffer_length = Load_Poisson_Boundary_To_Buffer( 0, 0, h_send_buffer_x0 );
-      #endif //SOR
-      #endif //GRAVITY
-
-      #ifdef PARTICLES
-      if ( Particles.TRANSFER_DENSITY_BOUNDARIES) {
-        #ifdef PARTICLES_GPU
-        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 0, 0, d_send_buffer_x0  );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-          #ifndef MPI_GPU 
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0  );
-          #else
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0_particles  );
-          cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice);
-          #endif
-        #endif
+
+  #ifdef GRAVITY
+      if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    #ifdef GRAVITY_GPU
+        buffer_length = Load_Gravity_Potential_To_Buffer_GPU(0, 0, d_send_buffer_x0, 0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+        buffer_length = Load_Gravity_Potential_To_Buffer(0, 0, h_send_buffer_x0, 0);
+    #endif
       }
-      else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
-        Load_and_Send_Particles_X0( ireq_n_particles, ireq_particles_transfer );
+    #ifdef SOR
+      if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES)
+        buffer_length = Load_Poisson_Boundary_To_Buffer(0, 0, h_send_buffer_x0);
+    #endif  // SOR
+  #endif    // GRAVITY
+
+  #ifdef PARTICLES
+      if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    #ifdef PARTICLES_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(0, 0, d_send_buffer_x0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+      #ifndef MPI_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0);
+      #else
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0_particles);
+        cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice);
+      #endif
+    #endif
+      } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Load_and_Send_Particles_X0(ireq_n_particles, ireq_particles_transfer);
         transfer_main_buffer = false;
-        ireq_n_particles ++;
-        ireq_particles_transfer ++;
+        ireq_n_particles++;
+        ireq_particles_transfer++;
       }
-      #endif
+  #endif
+
+      if (transfer_main_buffer) {
+  #if defined(MPI_GPU)
+        // post non-blocking receive left x communication buffer
+        MPI_Irecv(d_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]);
 
-      if ( transfer_main_buffer ){
-        #if defined(MPI_GPU)
-        //post non-blocking receive left x communication buffer
-        MPI_Irecv(d_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0,
-                  world, &recv_request[ireq]);
-
-        //non-blocking send left x communication buffer
-        MPI_Isend(d_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1,
-                  world, &send_request[0]);
-        #else
-        //post non-blocking receive left x communication buffer
-        MPI_Irecv(h_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0,
-                  world, &recv_request[ireq]);
-
-        //non-blocking send left x communication buffer
-        MPI_Isend(h_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1,
-                  world, &send_request[0]);
-        #endif
+        // non-blocking send left x communication buffer
+        MPI_Isend(d_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]);
+  #else
+        // post non-blocking receive left x communication buffer
+        MPI_Irecv(h_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]);
+
+        // non-blocking send left x communication buffer
+        MPI_Isend(h_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]);
+  #endif
         MPI_Request_free(send_request);
 
-        //keep track of how many sends and receives are expected
+        // keep track of how many sends and receives are expected
         ireq++;
       }
     }
 
-    if(flags[1]==5)
-    {
+    if (flags[1] == 5) {
       // load right x communication buffer
-      if ( H.TRANSFER_HYDRO_BOUNDARIES )
-        {
+      if (H.TRANSFER_HYDRO_BOUNDARIES) {
         buffer_length = Load_Hydro_DeviceBuffer_X1(d_send_buffer_x1);
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        //printf("X1 len: %d\n", buffer_length);
-        }
-
-      #ifdef GRAVITY
-      if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
-        #ifdef GRAVITY_GPU
-        buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 0, 1, d_send_buffer_x1, 0 );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-        buffer_length = Load_Gravity_Potential_To_Buffer( 0, 1, h_send_buffer_x1, 0 );
-        #endif
+  #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+  #endif
+        // printf("X1 len: %d\n", buffer_length);
       }
-      #ifdef SOR
-      if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES )  buffer_length = Load_Poisson_Boundary_To_Buffer( 0, 1, h_send_buffer_x1 );
-      #endif //SOR
-      #endif //GRAVITY
-
-      #ifdef PARTICLES
-      if ( Particles.TRANSFER_DENSITY_BOUNDARIES) {
-        #ifdef PARTICLES_GPU
-        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 0, 1, d_send_buffer_x1  );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-          #ifndef MPI_GPU 
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1  );
-          #else
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1_particles  );
-          cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice);
-          #endif
-        #endif
+
+  #ifdef GRAVITY
+      if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    #ifdef GRAVITY_GPU
+        buffer_length = Load_Gravity_Potential_To_Buffer_GPU(0, 1, d_send_buffer_x1, 0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+        buffer_length = Load_Gravity_Potential_To_Buffer(0, 1, h_send_buffer_x1, 0);
+    #endif
       }
-      else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
-        Load_and_Send_Particles_X1( ireq_n_particles, ireq_particles_transfer );
+    #ifdef SOR
+      if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES)
+        buffer_length = Load_Poisson_Boundary_To_Buffer(0, 1, h_send_buffer_x1);
+    #endif  // SOR
+  #endif    // GRAVITY
+
+  #ifdef PARTICLES
+      if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    #ifdef PARTICLES_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(0, 1, d_send_buffer_x1);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+      #ifndef MPI_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1);
+      #else
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1_particles);
+        cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice);
+      #endif
+    #endif
+      } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Load_and_Send_Particles_X1(ireq_n_particles, ireq_particles_transfer);
         transfer_main_buffer = false;
-        ireq_n_particles ++;
-        ireq_particles_transfer ++;
+        ireq_n_particles++;
+        ireq_particles_transfer++;
       }
-      #endif
+  #endif
 
-      if ( transfer_main_buffer ){
-	#if defined(MPI_GPU)
-        //post non-blocking receive right x communication buffer
+      if (transfer_main_buffer) {
+  #if defined(MPI_GPU)
+        // post non-blocking receive right x communication buffer
         MPI_Irecv(d_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]);
 
-        //non-blocking send right x communication buffer
-        MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1],   0, world, &send_request[1]);
-        #else
-        //post non-blocking receive right x communication buffer
+        // non-blocking send right x communication buffer
+        MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]);
+  #else
+        // post non-blocking receive right x communication buffer
         MPI_Irecv(h_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]);
 
-        //non-blocking send right x communication buffer
-        MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1],   0, world, &send_request[1]);
-        #endif
+        // non-blocking send right x communication buffer
+        MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]);
+  #endif
 
-        MPI_Request_free(send_request+1);
+        MPI_Request_free(send_request + 1);
 
-        //keep track of how many sends and receives are expected
+        // keep track of how many sends and receives are expected
         ireq++;
       }
     }
-    // Receive the number of particles transfer for X
-    #ifdef PARTICLES
-    if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags );
-    #endif
-
+  // Receive the number of particles transfer for X
+  #ifdef PARTICLES
+    if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+      Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags);
+    }
+  #endif
   }
 
   /* y boundaries */
-  if (dir==1) {
-    if(flags[2] == 5)
-    {
+  if (dir == 1) {
+    if (flags[2] == 5) {
       // load left y communication buffer
-      if ( H.TRANSFER_HYDRO_BOUNDARIES )
-        {
+      if (H.TRANSFER_HYDRO_BOUNDARIES) {
         buffer_length = Load_Hydro_DeviceBuffer_Y0(d_send_buffer_y0);
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        //printf("Y0 len: %d\n", buffer_length);
-        }
-
-      #ifdef GRAVITY
-      if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
-        #ifdef GRAVITY_GPU
-        buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 1, 0, d_send_buffer_y0, 0 );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-        buffer_length = Load_Gravity_Potential_To_Buffer( 1, 0, h_send_buffer_y0, 0 );
-        #endif
+  #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost);
+  #endif
+        // printf("Y0 len: %d\n", buffer_length);
       }
-      #ifdef SOR
-      if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES )  buffer_length = Load_Poisson_Boundary_To_Buffer( 1, 0, h_send_buffer_y0 );
-      #endif //SOR
-      #endif //GRAVITY
-
-      #ifdef PARTICLES
-      if ( Particles.TRANSFER_DENSITY_BOUNDARIES) {
-        #ifdef PARTICLES_GPU
-        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 1, 0, d_send_buffer_y0  );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-          #ifndef MPI_GPU 
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0  );
-          #else
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0_particles  );
-          cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice);
-          #endif
-        #endif
+
+  #ifdef GRAVITY
+      if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    #ifdef GRAVITY_GPU
+        buffer_length = Load_Gravity_Potential_To_Buffer_GPU(1, 0, d_send_buffer_y0, 0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+        buffer_length = Load_Gravity_Potential_To_Buffer(1, 0, h_send_buffer_y0, 0);
+    #endif
       }
-      else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
-        Load_and_Send_Particles_Y0( ireq_n_particles, ireq_particles_transfer );
+    #ifdef SOR
+      if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES)
+        buffer_length = Load_Poisson_Boundary_To_Buffer(1, 0, h_send_buffer_y0);
+    #endif  // SOR
+  #endif    // GRAVITY
+
+  #ifdef PARTICLES
+      if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    #ifdef PARTICLES_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(1, 0, d_send_buffer_y0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+      #ifndef MPI_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0);
+      #else
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0_particles);
+        cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice);
+      #endif
+    #endif
+      } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Load_and_Send_Particles_Y0(ireq_n_particles, ireq_particles_transfer);
         transfer_main_buffer = false;
-        ireq_n_particles ++;
-        ireq_particles_transfer ++;
+        ireq_n_particles++;
+        ireq_particles_transfer++;
       }
-      #endif
+  #endif
 
-      if ( transfer_main_buffer ){
-	#if defined(MPI_GPU)
-        //post non-blocking receive left y communication buffer
+      if (transfer_main_buffer) {
+  #if defined(MPI_GPU)
+        // post non-blocking receive left y communication buffer
         MPI_Irecv(d_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]);
 
-        //non-blocking send left y communication buffer
-        MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2],   3, world, &send_request[0]);
-        #else
-        //post non-blocking receive left y communication buffer
+        // non-blocking send left y communication buffer
+        MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]);
+  #else
+        // post non-blocking receive left y communication buffer
         MPI_Irecv(h_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]);
 
-        //non-blocking send left y communication buffer
-        MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2],   3, world, &send_request[0]);
-        #endif
+        // non-blocking send left y communication buffer
+        MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]);
+  #endif
 
         MPI_Request_free(send_request);
 
-        //keep track of how many sends and receives are expected
+        // keep track of how many sends and receives are expected
         ireq++;
       }
     }
 
-    if(flags[3]==5)
-    {
+    if (flags[3] == 5) {
       // load right y communication buffer
-      if ( H.TRANSFER_HYDRO_BOUNDARIES )
-        {
+      if (H.TRANSFER_HYDRO_BOUNDARIES) {
         buffer_length = Load_Hydro_DeviceBuffer_Y1(d_send_buffer_y1);
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        //printf("Y1 len: %d\n", buffer_length);
-        }
-
-
-      #ifdef GRAVITY
-      if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
-        #ifdef GRAVITY_GPU
-        buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 1, 1, d_send_buffer_y1, 0 );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-        buffer_length = Load_Gravity_Potential_To_Buffer( 1, 1, h_send_buffer_y1, 0 );
-        #endif
+  #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost);
+  #endif
+        // printf("Y1 len: %d\n", buffer_length);
       }
-      #ifdef SOR
-      if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES )  buffer_length = Load_Poisson_Boundary_To_Buffer( 1, 1, h_send_buffer_y1 );
-      #endif //SOR
-      #endif //GRAVITY
-
-      #ifdef PARTICLES
-      if ( Particles.TRANSFER_DENSITY_BOUNDARIES) {
-        #ifdef PARTICLES_GPU
-        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 1, 1, d_send_buffer_y1  );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-          #ifndef MPI_GPU 
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1  );
-          #else
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1_particles  );
-          cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice);
-          #endif
-        #endif
+
+  #ifdef GRAVITY
+      if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    #ifdef GRAVITY_GPU
+        buffer_length = Load_Gravity_Potential_To_Buffer_GPU(1, 1, d_send_buffer_y1, 0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+        buffer_length = Load_Gravity_Potential_To_Buffer(1, 1, h_send_buffer_y1, 0);
+    #endif
       }
-      else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
-        Load_and_Send_Particles_Y1( ireq_n_particles, ireq_particles_transfer );
+    #ifdef SOR
+      if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES)
+        buffer_length = Load_Poisson_Boundary_To_Buffer(1, 1, h_send_buffer_y1);
+    #endif  // SOR
+  #endif    // GRAVITY
+
+  #ifdef PARTICLES
+      if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    #ifdef PARTICLES_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(1, 1, d_send_buffer_y1);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+      #ifndef MPI_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1);
+      #else
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1_particles);
+        cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice);
+      #endif
+    #endif
+      } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Load_and_Send_Particles_Y1(ireq_n_particles, ireq_particles_transfer);
         transfer_main_buffer = false;
-        ireq_n_particles ++;
-        ireq_particles_transfer ++;
+        ireq_n_particles++;
+        ireq_particles_transfer++;
       }
-      #endif
+  #endif
 
-      if ( transfer_main_buffer ){
-        #if defined(MPI_GPU)
-        //post non-blocking receive right y communication buffer
+      if (transfer_main_buffer) {
+  #if defined(MPI_GPU)
+        // post non-blocking receive right y communication buffer
         MPI_Irecv(d_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]);
 
-        //non-blocking send right y communication buffer
-        MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3],   2, world, &send_request[1]);
-        #else
-        //post non-blocking receive right y communication buffer
+        // non-blocking send right y communication buffer
+        MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]);
+  #else
+        // post non-blocking receive right y communication buffer
         MPI_Irecv(h_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]);
 
-        //non-blocking send right y communication buffer
-        MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3],   2, world, &send_request[1]);
-        #endif
-        MPI_Request_free(send_request+1);
+        // non-blocking send right y communication buffer
+        MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]);
+  #endif
+        MPI_Request_free(send_request + 1);
 
-        //keep track of how many sends and receives are expected
+        // keep track of how many sends and receives are expected
         ireq++;
       }
     }
-    // Receive the number of particles transfer for Y
-    #ifdef PARTICLES
-    if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags );
-    #endif
-
+  // Receive the number of particles transfer for Y
+  #ifdef PARTICLES
+    if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+      Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags);
+    }
+  #endif
   }
 
   /* z boundaries */
-  if (dir==2) {
-
-    if(flags[4]==5)
-    {
+  if (dir == 2) {
+    if (flags[4] == 5) {
       // left z communication buffer
-      if ( H.TRANSFER_HYDRO_BOUNDARIES )
-        {
+      if (H.TRANSFER_HYDRO_BOUNDARIES) {
         buffer_length = Load_Hydro_DeviceBuffer_Z0(d_send_buffer_z0);
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        //printf("Z0 len: %d\n", buffer_length);
-        }
-
-      #ifdef GRAVITY
-      if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
-        #ifdef GRAVITY_GPU
-        buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 2, 0, d_send_buffer_z0, 0 );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-        buffer_length = Load_Gravity_Potential_To_Buffer( 2, 0, h_send_buffer_z0, 0 );
-        #endif
+  #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+  #endif
+        // printf("Z0 len: %d\n", buffer_length);
       }
-      #ifdef SOR
-      if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES )  buffer_length = Load_Poisson_Boundary_To_Buffer( 2, 0, h_send_buffer_z0 );
-      #endif //SOR
-      #endif //GRAVITY
-
-      #ifdef PARTICLES
-      if ( Particles.TRANSFER_DENSITY_BOUNDARIES) {
-        #ifdef PARTICLES_GPU
-        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 2, 0, d_send_buffer_z0  );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-          #ifndef MPI_GPU 
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0  );
-          #else
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0_particles  );
-          cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice);
-          #endif
-        #endif
+
+  #ifdef GRAVITY
+      if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    #ifdef GRAVITY_GPU
+        buffer_length = Load_Gravity_Potential_To_Buffer_GPU(2, 0, d_send_buffer_z0, 0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+        buffer_length = Load_Gravity_Potential_To_Buffer(2, 0, h_send_buffer_z0, 0);
+    #endif
       }
-      else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
-        Load_and_Send_Particles_Z0( ireq_n_particles, ireq_particles_transfer );
+    #ifdef SOR
+      if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES)
+        buffer_length = Load_Poisson_Boundary_To_Buffer(2, 0, h_send_buffer_z0);
+    #endif  // SOR
+  #endif    // GRAVITY
+
+  #ifdef PARTICLES
+      if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    #ifdef PARTICLES_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(2, 0, d_send_buffer_z0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+      #ifndef MPI_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0);
+      #else
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0_particles);
+        cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice);
+      #endif
+    #endif
+      } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Load_and_Send_Particles_Z0(ireq_n_particles, ireq_particles_transfer);
         transfer_main_buffer = false;
-        ireq_n_particles ++;
-        ireq_particles_transfer ++;
+        ireq_n_particles++;
+        ireq_particles_transfer++;
       }
-      #endif
+  #endif
 
-      if ( transfer_main_buffer ){
-        #if defined(MPI_GPU)
-        //post non-blocking receive left z communication buffer
+      if (transfer_main_buffer) {
+  #if defined(MPI_GPU)
+        // post non-blocking receive left z communication buffer
         MPI_Irecv(d_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]);
-        //non-blocking send left z communication buffer
-        MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4],   5, world, &send_request[0]);
-        #else
-        //post non-blocking receive left z communication buffer
+        // non-blocking send left z communication buffer
+        MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]);
+  #else
+        // post non-blocking receive left z communication buffer
         MPI_Irecv(h_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]);
 
-        //non-blocking send left z communication buffer
-        MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4],   5, world, &send_request[0]);
-        #endif
+        // non-blocking send left z communication buffer
+        MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]);
+  #endif
 
         MPI_Request_free(send_request);
 
-        //keep track of how many sends and receives are expected
+        // keep track of how many sends and receives are expected
         ireq++;
       }
     }
 
-    if(flags[5]==5)
-    {
+    if (flags[5] == 5) {
       // load right z communication buffer
-      if ( H.TRANSFER_HYDRO_BOUNDARIES )
-        {
+      if (H.TRANSFER_HYDRO_BOUNDARIES) {
         buffer_length = Load_Hydro_DeviceBuffer_Z1(d_send_buffer_z1);
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        //printf("Z1 len: %d\n", buffer_length);
-        }
-
-      #ifdef GRAVITY
-      if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
-        #ifdef GRAVITY_GPU
-        buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 2, 1, d_send_buffer_z1, 0 );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-        buffer_length = Load_Gravity_Potential_To_Buffer( 2, 1, h_send_buffer_z1, 0 );
-        #endif
+  #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+  #endif
+        // printf("Z1 len: %d\n", buffer_length);
       }
-      #ifdef SOR
-      if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES )  buffer_length = Load_Poisson_Boundary_To_Buffer( 2, 1, h_send_buffer_z1 );
-      #endif //SOR
-      #endif //GRAVITY
-
-      #ifdef PARTICLES
-      if ( Particles.TRANSFER_DENSITY_BOUNDARIES) {
-        #ifdef PARTICLES_GPU
-        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 2, 1, d_send_buffer_z1  );
-          #ifndef MPI_GPU
-          cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real),
-                     cudaMemcpyDeviceToHost);
-          #endif
-        #else
-          #ifndef MPI_GPU 
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1  );
-          #else
-          buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1_particles  );
-          cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice);
-          #endif
-        #endif
+
+  #ifdef GRAVITY
+      if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    #ifdef GRAVITY_GPU
+        buffer_length = Load_Gravity_Potential_To_Buffer_GPU(2, 1, d_send_buffer_z1, 0);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+        buffer_length = Load_Gravity_Potential_To_Buffer(2, 1, h_send_buffer_z1, 0);
+    #endif
       }
-      else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){
-        Load_and_Send_Particles_Z1( ireq_n_particles, ireq_particles_transfer );
+    #ifdef SOR
+      if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES)
+        buffer_length = Load_Poisson_Boundary_To_Buffer(2, 1, h_send_buffer_z1);
+    #endif  // SOR
+  #endif    // GRAVITY
+
+  #ifdef PARTICLES
+      if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    #ifdef PARTICLES_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(2, 1, d_send_buffer_z1);
+      #ifndef MPI_GPU
+        cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost);
+      #endif
+    #else
+      #ifndef MPI_GPU
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1);
+      #else
+        buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1_particles);
+        cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice);
+      #endif
+    #endif
+      } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+        Load_and_Send_Particles_Z1(ireq_n_particles, ireq_particles_transfer);
         transfer_main_buffer = false;
-        ireq_n_particles ++;
-        ireq_particles_transfer ++;
+        ireq_n_particles++;
+        ireq_particles_transfer++;
       }
-      #endif
+  #endif
 
-      if ( transfer_main_buffer ){
-        #if defined(MPI_GPU)
-        //post non-blocking receive right x communication buffer
+      if (transfer_main_buffer) {
+  #if defined(MPI_GPU)
+        // post non-blocking receive right x communication buffer
         MPI_Irecv(d_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]);
 
-        //non-blocking send right x communication buffer
-        MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5],   4, world, &send_request[1]);
-        #else
-        //post non-blocking receive right x communication buffer
+        // non-blocking send right x communication buffer
+        MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]);
+  #else
+        // post non-blocking receive right x communication buffer
         MPI_Irecv(h_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]);
 
-        //non-blocking send right x communication buffer
-        MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5],   4, world, &send_request[1]);
-        #endif
-        MPI_Request_free(send_request+1);
+        // non-blocking send right x communication buffer
+        MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]);
+  #endif
+        MPI_Request_free(send_request + 1);
 
-        //keep track of how many sends and receives are expected
+        // keep track of how many sends and receives are expected
         ireq++;
       }
     }
     // Receive the number of particles transfer for Z
-      #ifdef PARTICLES
-      if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags );
-      #endif
+  #ifdef PARTICLES
+    if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+      Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags);
+    }
+  #endif
   }
-
 }
 
 void Grid3D::Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags)
 {
-
   #ifdef PARTICLES
-  // If we are transfering the particles buffers we dont need to unload the main buffers
-  if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) return;
+  // If we are transfering the particles buffers we dont need to unload the main
+  // buffers
+  if (Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+    return;
+  }
   #endif
 
   int iwait;
-  int index = 0;
-  int wait_max=0;
+  int index    = 0;
+  int wait_max = 0;
   MPI_Status status;
 
-  //find out how many recvs we need to wait for
-  if (dir==0) {
-    if(flags[0] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[1] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-  }
-  if (dir==1) {
-    if(flags[2] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[3] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-  }
-  if (dir==2) {
-    if(flags[4] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[5] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-  }
-
-  //wait for any receives to complete
-  for(iwait=0;iwait<wait_max;iwait++)
-  {
-    //wait for recv completion
-    MPI_Waitany(wait_max,recv_request,&index,&status);
-    //if (procID==1) MPI_Get_count(&status, MPI_CHREAL, &count);
-    //if (procID==1) printf("Process 1 unloading direction %d, source %d, index %d, length %d.\n", status.MPI_TAG, status.MPI_SOURCE, index, count);
-    //depending on which face arrived, load the buffer into the ghost grid
+  // find out how many recvs we need to wait for
+  if (dir == 0) {
+    if (flags[0] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[1] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+  }
+  if (dir == 1) {
+    if (flags[2] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[3] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+  }
+  if (dir == 2) {
+    if (flags[4] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[5] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+  }
+
+  // wait for any receives to complete
+  for (iwait = 0; iwait < wait_max; iwait++) {
+    // wait for recv completion
+    MPI_Waitany(wait_max, recv_request, &index, &status);
+    // if (procID==1) MPI_Get_count(&status, MPI_CHREAL, &count);
+    // if (procID==1) printf("Process 1 unloading direction %d, source %d, index
+    // %d, length %d.\n", status.MPI_TAG, status.MPI_SOURCE, index, count);
+    // depending on which face arrived, load the buffer into the ghost grid
     Unload_MPI_Comm_Buffers(status.MPI_TAG);
   }
 }
 
 void Grid3D::Unload_MPI_Comm_Buffers(int index)
 {
-
   // local recv buffers
-  Real *l_recv_buffer_x0, *l_recv_buffer_x1, *l_recv_buffer_y0,
-       *l_recv_buffer_y1, *l_recv_buffer_z0, *l_recv_buffer_z1;
+  Real *l_recv_buffer_x0, *l_recv_buffer_x1, *l_recv_buffer_y0, *l_recv_buffer_y1, *l_recv_buffer_z0, *l_recv_buffer_z1;
 
-  Grid3D_PMF_UnloadHydroBuffer Fptr_Unload_Hydro_Buffer_X0,
-                               Fptr_Unload_Hydro_Buffer_X1,
-                               Fptr_Unload_Hydro_Buffer_Y0,
-                               Fptr_Unload_Hydro_Buffer_Y1,
-                               Fptr_Unload_Hydro_Buffer_Z0,
-                               Fptr_Unload_Hydro_Buffer_Z1;
+  Grid3D_PMF_UnloadHydroBuffer Fptr_Unload_Hydro_Buffer_X0, Fptr_Unload_Hydro_Buffer_X1, Fptr_Unload_Hydro_Buffer_Y0,
+      Fptr_Unload_Hydro_Buffer_Y1, Fptr_Unload_Hydro_Buffer_Z0, Fptr_Unload_Hydro_Buffer_Z1;
 
   Grid3D_PMF_UnloadGravityPotential Fptr_Unload_Gravity_Potential;
   Grid3D_PMF_UnloadParticleDensity Fptr_Unload_Particle_Density;
 
-  if ( H.TRANSFER_HYDRO_BOUNDARIES ) {
-    #ifndef MPI_GPU
-    copyHostToDeviceReceiveBuffer ( index );
-    #endif
+  if (H.TRANSFER_HYDRO_BOUNDARIES) {
+  #ifndef MPI_GPU
+    copyHostToDeviceReceiveBuffer(index);
+  #endif
     l_recv_buffer_x0 = d_recv_buffer_x0;
     l_recv_buffer_x1 = d_recv_buffer_x1;
     l_recv_buffer_y0 = d_recv_buffer_y0;
@@ -891,23 +858,34 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index)
     Fptr_Unload_Hydro_Buffer_Z0 = &Grid3D::Unload_Hydro_DeviceBuffer_Z0;
     Fptr_Unload_Hydro_Buffer_Z1 = &Grid3D::Unload_Hydro_DeviceBuffer_Z1;
 
-
-    switch ( index ) {
-    case ( 0 ): (this->*Fptr_Unload_Hydro_Buffer_X0) ( l_recv_buffer_x0 ); break;
-    case ( 1 ): (this->*Fptr_Unload_Hydro_Buffer_X1) ( l_recv_buffer_x1 ); break;
-    case ( 2 ): (this->*Fptr_Unload_Hydro_Buffer_Y0) ( l_recv_buffer_y0 ); break;
-    case ( 3 ): (this->*Fptr_Unload_Hydro_Buffer_Y1) ( l_recv_buffer_y1 ); break;
-    case ( 4 ): (this->*Fptr_Unload_Hydro_Buffer_Z0) ( l_recv_buffer_z0 ); break;
-    case ( 5 ): (this->*Fptr_Unload_Hydro_Buffer_Z1) ( l_recv_buffer_z1 ); break;
+    switch (index) {
+      case (0):
+        (this->*Fptr_Unload_Hydro_Buffer_X0)(l_recv_buffer_x0);
+        break;
+      case (1):
+        (this->*Fptr_Unload_Hydro_Buffer_X1)(l_recv_buffer_x1);
+        break;
+      case (2):
+        (this->*Fptr_Unload_Hydro_Buffer_Y0)(l_recv_buffer_y0);
+        break;
+      case (3):
+        (this->*Fptr_Unload_Hydro_Buffer_Y1)(l_recv_buffer_y1);
+        break;
+      case (4):
+        (this->*Fptr_Unload_Hydro_Buffer_Z0)(l_recv_buffer_z0);
+        break;
+      case (5):
+        (this->*Fptr_Unload_Hydro_Buffer_Z1)(l_recv_buffer_z1);
+        break;
     }
   }
 
   #ifdef GRAVITY
-  if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){
+  if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
     #ifdef GRAVITY_GPU
-     #ifndef MPI_GPU
-     copyHostToDeviceReceiveBuffer ( index );
-     #endif // MPI_GPU
+      #ifndef MPI_GPU
+    copyHostToDeviceReceiveBuffer(index);
+      #endif  // MPI_GPU
 
     l_recv_buffer_x0 = d_recv_buffer_x0;
     l_recv_buffer_x1 = d_recv_buffer_x1;
@@ -916,8 +894,7 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index)
     l_recv_buffer_z0 = d_recv_buffer_z0;
     l_recv_buffer_z1 = d_recv_buffer_z1;
 
-    Fptr_Unload_Gravity_Potential
-      = &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU;
+    Fptr_Unload_Gravity_Potential = &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU;
 
     #else
 
@@ -928,21 +905,32 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index)
     l_recv_buffer_z0 = h_recv_buffer_z0;
     l_recv_buffer_z1 = h_recv_buffer_z1;
 
-    Fptr_Unload_Gravity_Potential
-      = &Grid3D::Unload_Gravity_Potential_from_Buffer;
+    Fptr_Unload_Gravity_Potential = &Grid3D::Unload_Gravity_Potential_from_Buffer;
 
-    #endif // GRAVITY_GPU
+    #endif  // GRAVITY_GPU
 
-    if ( index == 0 ) (this->*Fptr_Unload_Gravity_Potential)( 0, 0, l_recv_buffer_x0, 0  );
-    if ( index == 1 ) (this->*Fptr_Unload_Gravity_Potential)( 0, 1, l_recv_buffer_x1, 0  );
-    if ( index == 2 ) (this->*Fptr_Unload_Gravity_Potential)( 1, 0, l_recv_buffer_y0, 0  );
-    if ( index == 3 ) (this->*Fptr_Unload_Gravity_Potential)( 1, 1, l_recv_buffer_y1, 0  );
-    if ( index == 4 ) (this->*Fptr_Unload_Gravity_Potential)( 2, 0, l_recv_buffer_z0, 0  );
-    if ( index == 5 ) (this->*Fptr_Unload_Gravity_Potential)( 2, 1, l_recv_buffer_z1, 0  );
+    if (index == 0) {
+      (this->*Fptr_Unload_Gravity_Potential)(0, 0, l_recv_buffer_x0, 0);
+    }
+    if (index == 1) {
+      (this->*Fptr_Unload_Gravity_Potential)(0, 1, l_recv_buffer_x1, 0);
+    }
+    if (index == 2) {
+      (this->*Fptr_Unload_Gravity_Potential)(1, 0, l_recv_buffer_y0, 0);
+    }
+    if (index == 3) {
+      (this->*Fptr_Unload_Gravity_Potential)(1, 1, l_recv_buffer_y1, 0);
+    }
+    if (index == 4) {
+      (this->*Fptr_Unload_Gravity_Potential)(2, 0, l_recv_buffer_z0, 0);
+    }
+    if (index == 5) {
+      (this->*Fptr_Unload_Gravity_Potential)(2, 1, l_recv_buffer_z1, 0);
+    }
   }
 
-  #ifdef SOR
-  if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ){
+    #ifdef SOR
+  if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) {
     l_recv_buffer_x0 = h_recv_buffer_x0;
     l_recv_buffer_x1 = h_recv_buffer_x1;
     l_recv_buffer_y0 = h_recv_buffer_y0;
@@ -950,23 +938,34 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index)
     l_recv_buffer_z0 = h_recv_buffer_z0;
     l_recv_buffer_z1 = h_recv_buffer_z1;
 
-    if ( index == 0 ) Unload_Poisson_Boundary_From_Buffer( 0, 0, l_recv_buffer_x0 );
-    if ( index == 1 ) Unload_Poisson_Boundary_From_Buffer( 0, 1, l_recv_buffer_x1 );
-    if ( index == 2 ) Unload_Poisson_Boundary_From_Buffer( 1, 0, l_recv_buffer_y0 );
-    if ( index == 3 ) Unload_Poisson_Boundary_From_Buffer( 1, 1, l_recv_buffer_y1 );
-    if ( index == 4 ) Unload_Poisson_Boundary_From_Buffer( 2, 0, l_recv_buffer_z0 );
-    if ( index == 5 ) Unload_Poisson_Boundary_From_Buffer( 2, 1, l_recv_buffer_z1 );
+    if (index == 0) {
+      Unload_Poisson_Boundary_From_Buffer(0, 0, l_recv_buffer_x0);
+    }
+    if (index == 1) {
+      Unload_Poisson_Boundary_From_Buffer(0, 1, l_recv_buffer_x1);
+    }
+    if (index == 2) {
+      Unload_Poisson_Boundary_From_Buffer(1, 0, l_recv_buffer_y0);
+    }
+    if (index == 3) {
+      Unload_Poisson_Boundary_From_Buffer(1, 1, l_recv_buffer_y1);
+    }
+    if (index == 4) {
+      Unload_Poisson_Boundary_From_Buffer(2, 0, l_recv_buffer_z0);
+    }
+    if (index == 5) {
+      Unload_Poisson_Boundary_From_Buffer(2, 1, l_recv_buffer_z1);
+    }
   }
-  #endif //SOR
-
-  #endif  //GRAVITY
+    #endif  // SOR
 
+  #endif  // GRAVITY
 
   #ifdef PARTICLES
-  if (  Particles.TRANSFER_DENSITY_BOUNDARIES ){
+  if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
     #ifdef PARTICLES_GPU
       #ifndef MPI_GPU
-      copyHostToDeviceReceiveBuffer ( index );
+    copyHostToDeviceReceiveBuffer(index);
       #endif
 
     l_recv_buffer_x0 = d_recv_buffer_x0;
@@ -976,48 +975,69 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index)
     l_recv_buffer_z0 = d_recv_buffer_z0;
     l_recv_buffer_z1 = d_recv_buffer_z1;
 
-    Fptr_Unload_Particle_Density
-      = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU;
+    Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU;
 
     #else
-    
-    #ifdef MPI_GPU 
-    if ( index == 0 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles );
-    if ( index == 1 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles );
-    if ( index == 2 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles );
-    if ( index == 3 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles );
-    if ( index == 4 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles );
-    if ( index == 5 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles );
+
+      #ifdef MPI_GPU
+    if (index == 0) {
+      Copy_Particles_Density_Buffer_Device_to_Host(0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles);
+    }
+    if (index == 1) {
+      Copy_Particles_Density_Buffer_Device_to_Host(0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles);
+    }
+    if (index == 2) {
+      Copy_Particles_Density_Buffer_Device_to_Host(1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles);
+    }
+    if (index == 3) {
+      Copy_Particles_Density_Buffer_Device_to_Host(1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles);
+    }
+    if (index == 4) {
+      Copy_Particles_Density_Buffer_Device_to_Host(2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles);
+    }
+    if (index == 5) {
+      Copy_Particles_Density_Buffer_Device_to_Host(2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles);
+    }
     l_recv_buffer_x0 = h_recv_buffer_x0_particles;
     l_recv_buffer_x1 = h_recv_buffer_x1_particles;
     l_recv_buffer_y0 = h_recv_buffer_y0_particles;
     l_recv_buffer_y1 = h_recv_buffer_y1_particles;
     l_recv_buffer_z0 = h_recv_buffer_z0_particles;
     l_recv_buffer_z1 = h_recv_buffer_z1_particles;
-    #else
+      #else
     l_recv_buffer_x0 = h_recv_buffer_x0;
     l_recv_buffer_x1 = h_recv_buffer_x1;
     l_recv_buffer_y0 = h_recv_buffer_y0;
     l_recv_buffer_y1 = h_recv_buffer_y1;
     l_recv_buffer_z0 = h_recv_buffer_z0;
     l_recv_buffer_z1 = h_recv_buffer_z1;
-    #endif //MPI_GPU
-    
-    Fptr_Unload_Particle_Density
-      = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer;
+      #endif  // MPI_GPU
 
-    #endif // PARTICLES_GPU
+    Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer;
 
-    if ( index == 0 ) (this->*Fptr_Unload_Particle_Density)( 0, 0, l_recv_buffer_x0 );
-    if ( index == 1 ) (this->*Fptr_Unload_Particle_Density)( 0, 1, l_recv_buffer_x1 );
-    if ( index == 2 ) (this->*Fptr_Unload_Particle_Density)( 1, 0, l_recv_buffer_y0 );
-    if ( index == 3 ) (this->*Fptr_Unload_Particle_Density)( 1, 1, l_recv_buffer_y1 );
-    if ( index == 4 ) (this->*Fptr_Unload_Particle_Density)( 2, 0, l_recv_buffer_z0 );
-    if ( index == 5 ) (this->*Fptr_Unload_Particle_Density)( 2, 1, l_recv_buffer_z1 );
-  }
+    #endif  // PARTICLES_GPU
 
-  #endif  //PARTICLES
+    if (index == 0) {
+      (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0);
+    }
+    if (index == 1) {
+      (this->*Fptr_Unload_Particle_Density)(0, 1, l_recv_buffer_x1);
+    }
+    if (index == 2) {
+      (this->*Fptr_Unload_Particle_Density)(1, 0, l_recv_buffer_y0);
+    }
+    if (index == 3) {
+      (this->*Fptr_Unload_Particle_Density)(1, 1, l_recv_buffer_y1);
+    }
+    if (index == 4) {
+      (this->*Fptr_Unload_Particle_Density)(2, 0, l_recv_buffer_z0);
+    }
+    if (index == 5) {
+      (this->*Fptr_Unload_Particle_Density)(2, 1, l_recv_buffer_z1);
+    }
+  }
 
+  #endif  // PARTICLES
 }
 
 #endif /*MPI_CHOLLA*/
diff --git a/src/h_correction/flux_correction.h b/src/h_correction/flux_correction.h
index 15aac55e3..6b1a2e055 100644
--- a/src/h_correction/flux_correction.h
+++ b/src/h_correction/flux_correction.h
@@ -1,22 +1,30 @@
 /*! \file flux_correction.h
- *  \brief Declarations of functions used in the first-order flux correction method. */
+ *  \brief Declarations of functions used in the first-order flux correction
+ * method. */
 
 #ifndef FLUX_CORRECTION_H
 #define FLUX_CORRECTION_H
 
-void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt);
+void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost,
+                        Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt);
 
 void fill_flux_array_pcm(Real *C1, int idl, int idr, Real cW[], int n_cells, int dir);
 
-void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], Real C_imt[], Real C_ipo[], Real C_ipt[], Real C_jmo[], Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], Real C_kmt[], Real C_kpo[], Real C_kpt[], int i, int j, int k, Real dx, Real dy, Real dz, Real dt, int n_fields, int nx, int ny, int nz, int n_cells);
+void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], Real C_imt[], Real C_ipo[], Real C_ipt[],
+                         Real C_jmo[], Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], Real C_kmt[],
+                         Real C_kpo[], Real C_kpt[], int i, int j, int k, Real dx, Real dy, Real dz, Real dt,
+                         int n_fields, int nx, int ny, int nz, int n_cells);
 
 void average_cell(Real *C1, int i, int j, int k, int nx, int ny, int nz, int n_cells, int n_fields);
 
-void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, int nx, int ny, int nz, int n_cells);
+void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields,
+                        int nx, int ny, int nz, int n_cells);
 
-void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, int nx, int ny, int nz, int n_cells);
+void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields,
+                        int nx, int ny, int nz, int n_cells);
 
-void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz);
+void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz,
+               Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz);
 
 void cooling_CPU(Real *C2, int id, int n_cells, Real dt);
 
@@ -24,4 +32,4 @@ Real Schure_cool_CPU(Real n, Real T);
 
 Real Wiersma_cool_CPU(Real n, Real T);
 
-#endif //FLUX_CORRECTION_H
+#endif  // FLUX_CORRECTION_H
diff --git a/src/h_correction/h_correction_2D_cuda.cu b/src/h_correction/h_correction_2D_cuda.cu
index f04f7816e..d4e65d7cc 100644
--- a/src/h_correction/h_correction_2D_cuda.cu
+++ b/src/h_correction/h_correction_2D_cuda.cu
@@ -1,172 +1,168 @@
 /*! \file h_correction_2D_cuda.cu
  *  \brief Functions definitions for the H correction kernels.
            Written following Sanders et al. 1998. */
-#ifdef CUDA
-#ifdef H_CORRECTION
 
-#include "../utils/gpu.hpp"
-#include <math.h>
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../h_correction/h_correction_2D_cuda.h"
+#ifdef H_CORRECTION
 
+  #include <math.h>
 
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../h_correction/h_correction_2D_cuda.h"
+  #include "../utils/gpu.hpp"
 
-/*! \fn void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma)
+/*! \fn void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x,
+ int nx, int ny, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost,
+                              Real gamma)
 {
-  int n_cells = nx*ny;
+  int n_cells = nx * ny;
 
   // declare primitive variables for each stencil
   // these will be placed into registers for each thread
   Real pl, pr, al, ar;
 
   // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId * blockDim.x;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
-
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
-  if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-2 && yid < ny-n_ghost+1)
-  {
+  if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && yid < ny - n_ghost + 1) {
     // load the interface values into registers
-    id = xid + yid*nx;
-    pl  = (dev_bounds_L[4*n_cells + id] -
-      0.5*(dev_bounds_L[  n_cells+id]*dev_bounds_L[  n_cells+id] +
-           dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] +
-           dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0);
-    pl  = fmax(pl, (Real) 1.0e-20);
-    pr  = (dev_bounds_R[4*n_cells + id] -
-      0.5*(dev_bounds_R[  n_cells+id]*dev_bounds_R[  n_cells+id] +
-           dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] +
-           dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0);
-    pr  = fmax(pr, (Real) 1.0e-20);
-
-    al = sqrt(gamma*pl/dev_bounds_L[id]);
-    ar = sqrt(gamma*pl/dev_bounds_R[id]);
-
-    eta_x[id] = 0.5*fabs((dev_bounds_R[n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[n_cells+id]/dev_bounds_L[id] - al));
-
+    id = xid + yid * nx;
+    pl = (dev_bounds_L[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] +
+                                                dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] +
+                                                dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) /
+                                               dev_bounds_L[id]) *
+         (gamma - 1.0);
+    pl = fmax(pl, (Real)1.0e-20);
+    pr = (dev_bounds_R[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] +
+                                                dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] +
+                                                dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) /
+                                               dev_bounds_R[id]) *
+         (gamma - 1.0);
+    pr = fmax(pr, (Real)1.0e-20);
+
+    al = sqrt(gamma * pl / dev_bounds_L[id]);
+    ar = sqrt(gamma * pl / dev_bounds_R[id]);
+
+    eta_x[id] = 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) -
+                           (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al));
   }
-
 }
 
-
-
-/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma)
+/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int
+ nx, int ny, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost,
+                              Real gamma)
 {
-  int n_cells = nx*ny;
+  int n_cells = nx * ny;
 
   // declare primitive variables for each stencil
   // these will be placed into registers for each thread
   Real pl, pr, al, ar;
 
   // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId*blockDim.x;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
-
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
-  if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1)
-  {
+  if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1) {
     // load the interface values into registers
-    id = xid + yid*nx;
-    pl  = (dev_bounds_L[4*n_cells + id] -
-      0.5*(dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] +
-           dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] +
-           dev_bounds_L[  n_cells+id]*dev_bounds_L[  n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0);
-    pl  = fmax(pl, (Real) 1.0e-20);
-    pr  = (dev_bounds_R[4*n_cells + id] -
-      0.5*(dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] +
-           dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] +
-           dev_bounds_R[  n_cells+id]*dev_bounds_R[  n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0);
-    pr  = fmax(pr, (Real) 1.0e-20);
-
-    al = sqrt(gamma*pl/dev_bounds_L[id]);
-    ar = sqrt(gamma*pl/dev_bounds_R[id]);
-
-    eta_y[id] = 0.5*fabs((dev_bounds_R[2*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[2*n_cells+id]/dev_bounds_L[id] - al));
-
+    id = xid + yid * nx;
+    pl = (dev_bounds_L[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] +
+                                                dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] +
+                                                dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) /
+                                               dev_bounds_L[id]) *
+         (gamma - 1.0);
+    pl = fmax(pl, (Real)1.0e-20);
+    pr = (dev_bounds_R[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] +
+                                                dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] +
+                                                dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) /
+                                               dev_bounds_R[id]) *
+         (gamma - 1.0);
+    pr = fmax(pr, (Real)1.0e-20);
+
+    al = sqrt(gamma * pl / dev_bounds_L[id]);
+    ar = sqrt(gamma * pl / dev_bounds_R[id]);
+
+    eta_y[id] = 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) -
+                           (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al));
   }
-
 }
 
-
-
-/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost)
+/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int
+ ny, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
 __global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost)
 {
-
   // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId*blockDim.x;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   Real etah;
 
-  if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost)
-  {
-      id = xid + yid*nx;
+  if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) {
+    id = xid + yid * nx;
 
-      etah = fmax(eta_y[xid + (yid-1)*nx], eta_y[id]);
-      etah = fmax(etah, eta_x[id]);
-      etah = fmax(etah, eta_y[xid+1 + (yid-1)*nx]);
-      etah = fmax(etah, eta_y[xid+1 + yid*nx]);
+    etah = fmax(eta_y[xid + (yid - 1) * nx], eta_y[id]);
+    etah = fmax(etah, eta_x[id]);
+    etah = fmax(etah, eta_y[xid + 1 + (yid - 1) * nx]);
+    etah = fmax(etah, eta_y[xid + 1 + yid * nx]);
 
-      etah_x[id] = etah;
+    etah_x[id] = etah;
   }
-
 }
 
-
-/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost)
+/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int
+ ny, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
 __global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost)
 {
-
   // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId*blockDim.x;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   Real etah;
 
-  if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost)
-  {
-      id = xid + yid*nx;
+  if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost) {
+    id = xid + yid * nx;
 
-      etah = fmax(eta_x[xid-1 + yid*nx], eta_x[id]);
-      etah = fmax(etah, eta_y[id]);
-      etah = fmax(etah, eta_x[xid-1 + (yid+1)*nx]);
-      etah = fmax(etah, eta_x[xid + (yid+1)*nx]);
+    etah = fmax(eta_x[xid - 1 + yid * nx], eta_x[id]);
+    etah = fmax(etah, eta_y[id]);
+    etah = fmax(etah, eta_x[xid - 1 + (yid + 1) * nx]);
+    etah = fmax(etah, eta_x[xid + (yid + 1) * nx]);
 
-      etah_y[id] = etah;
+    etah_y[id] = etah;
   }
-
 }
 
-
-
-
-#endif //H_CORRECTION
-#endif //CUDA
+#endif  // H_CORRECTION
diff --git a/src/h_correction/h_correction_2D_cuda.h b/src/h_correction/h_correction_2D_cuda.h
index daa11e39a..9d824cf42 100644
--- a/src/h_correction/h_correction_2D_cuda.h
+++ b/src/h_correction/h_correction_2D_cuda.h
@@ -1,42 +1,46 @@
 /*! \file h_correction_2D_cuda.h
  *  \brief Functions declarations for the H correction kernels.
            Written following Sanders et al. 1998. */
-#ifdef CUDA
-#ifdef H_CORRECTION
-#ifndef H_CORRECTION_2D_H
-#define H_CORRECTION_2D_H
-
-#include "../utils/gpu.hpp"
-#include <math.h>
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-
-
-
-/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma);
-
 
-/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma);
-
-
-/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost)
+#ifdef H_CORRECTION
+  #ifndef H_CORRECTION_2D_H
+    #define H_CORRECTION_2D_H
+
+    #include <math.h>
+
+    #include "../global/global.h"
+    #include "../global/global_cuda.h"
+    #include "../utils/gpu.hpp"
+
+/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int
+ nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost,
+                              Real gamma);
+
+/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int
+ nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost,
+                              Real gamma);
+
+/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int
+ ny, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
 __global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost);
 
-
-/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost)
+/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int
+ ny, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
 __global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost);
 
-
-#endif //H_CORRECTION_2D_H
-#endif //H_CORRECTION
-#endif //CUDA
+  #endif  // H_CORRECTION_2D_H
+#endif    // H_CORRECTION
diff --git a/src/h_correction/h_correction_3D_cuda.cu b/src/h_correction/h_correction_3D_cuda.cu
index e58632eaf..b3609b529 100644
--- a/src/h_correction/h_correction_3D_cuda.cu
+++ b/src/h_correction/h_correction_3D_cuda.cu
@@ -1,263 +1,265 @@
 /*! \file h_correction_3D_cuda.cu
  *  \brief Functions definitions for the H correction kernels.
            Written following Sanders et al. 1998. */
-#ifdef CUDA
 
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../h_correction/h_correction_3D_cuda.h"
+#include "../utils/gpu.hpp"
 
-
-
-/*! \fn void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma)
+/*! \fn void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x,
+ int nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost,
+                              Real gamma)
 {
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
 
   // declare primitive variables for each stencil
   // these will be placed into registers for each thread
   Real pl, pr, al, ar;
 
   // get a thread ID
-  int tid = threadIdx.x + blockIdx.x*blockDim.x;
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   // x-direction
-  if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-2 && yid < ny-n_ghost+1 && zid > n_ghost-2 && zid < nz-n_ghost+1)
-  {
+  if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && yid < ny - n_ghost + 1 && zid > n_ghost - 2 &&
+      zid < nz - n_ghost + 1) {
     // load the interface values into registers
-    id = xid + yid*nx + zid*nx*ny;
-    pl  = (dev_bounds_L[4*n_cells + id] -
-      0.5*(dev_bounds_L[  n_cells+id]*dev_bounds_L[  n_cells+id] +
-           dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] +
-           dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0);
-    pl  = fmax(pl, (Real) 1.0e-20);
-    pr  = (dev_bounds_R[4*n_cells + id] -
-      0.5*(dev_bounds_R[  n_cells+id]*dev_bounds_R[  n_cells+id] +
-           dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] +
-           dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0);
-    pr  = fmax(pr, (Real) 1.0e-20);
-
-    al = sqrt(gamma*pl/dev_bounds_L[id]);
-    ar = sqrt(gamma*pl/dev_bounds_R[id]);
-
-    eta_x[id] = 0.5*fabs((dev_bounds_R[n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[n_cells+id]/dev_bounds_L[id] - al));
-
+    id = xid + yid * nx + zid * nx * ny;
+    pl = (dev_bounds_L[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] +
+                                                dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] +
+                                                dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) /
+                                               dev_bounds_L[id]) *
+         (gamma - 1.0);
+    pl = fmax(pl, (Real)1.0e-20);
+    pr = (dev_bounds_R[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] +
+                                                dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] +
+                                                dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) /
+                                               dev_bounds_R[id]) *
+         (gamma - 1.0);
+    pr = fmax(pr, (Real)1.0e-20);
+
+    al = sqrt(gamma * pl / dev_bounds_L[id]);
+    ar = sqrt(gamma * pl / dev_bounds_R[id]);
+
+    eta_x[id] = 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) -
+                           (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al));
   }
-
 }
 
-
-
-/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma)
+/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int
+ nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost,
+                              Real gamma)
 {
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
 
   // declare primitive variables for each stencil
   // these will be placed into registers for each thread
   Real pl, pr, al, ar;
 
   // get a thread ID
-  int tid = threadIdx.x + blockIdx.x*blockDim.x;
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   // y-direction
-  if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1 && zid > n_ghost-2 && zid < nz-n_ghost+1)
-  {
+  if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1 && zid > n_ghost - 2 &&
+      zid < nz - n_ghost + 1) {
     // load the interface values into registers
-    id = xid + yid*nx + zid*nx*ny;
-    pl  = (dev_bounds_L[4*n_cells + id] -
-      0.5*(dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] +
-           dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] +
-           dev_bounds_L[  n_cells+id]*dev_bounds_L[  n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0);
-    pl  = fmax(pl, (Real) 1.0e-20);
-    pr  = (dev_bounds_R[4*n_cells + id] -
-      0.5*(dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] +
-           dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] +
-           dev_bounds_R[  n_cells+id]*dev_bounds_R[  n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0);
-    pr  = fmax(pr, (Real) 1.0e-20);
-
-    al = sqrt(gamma*pl/dev_bounds_L[id]);
-    ar = sqrt(gamma*pl/dev_bounds_R[id]);
-
-    eta_y[id] = 0.5*fabs((dev_bounds_R[2*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[2*n_cells+id]/dev_bounds_L[id] - al));
-
+    id = xid + yid * nx + zid * nx * ny;
+    pl = (dev_bounds_L[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] +
+                                                dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] +
+                                                dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) /
+                                               dev_bounds_L[id]) *
+         (gamma - 1.0);
+    pl = fmax(pl, (Real)1.0e-20);
+    pr = (dev_bounds_R[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] +
+                                                dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] +
+                                                dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) /
+                                               dev_bounds_R[id]) *
+         (gamma - 1.0);
+    pr = fmax(pr, (Real)1.0e-20);
+
+    al = sqrt(gamma * pl / dev_bounds_L[id]);
+    ar = sqrt(gamma * pl / dev_bounds_R[id]);
+
+    eta_y[id] = 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) -
+                           (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al));
   }
-
 }
 
-
-/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma)
+/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int
+ nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost,
+                              Real gamma)
 {
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
 
   // declare primitive variables for each stencil
   // these will be placed into registers for each thread
   Real pl, pr, al, ar;
 
   // get a thread ID
-  int tid = threadIdx.x + blockIdx.x*blockDim.x;
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   // z-direction
-  if (zid > n_ghost-2 && zid < nz-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1 && yid > n_ghost-2 && yid < ny-n_ghost+1)
-  {
+  if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1 && yid > n_ghost - 2 &&
+      yid < ny - n_ghost + 1) {
     // load the interface values into registers
-    id = xid + yid*nx + zid*nx*ny;
-    pl  = (dev_bounds_L[4*n_cells + id] -
-      0.5*(dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] +
-           dev_bounds_L[  n_cells+id]*dev_bounds_L[  n_cells+id] +
-           dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0);
-    pl  = fmax(pl, (Real) 1.0e-20);
-    pr  = (dev_bounds_R[4*n_cells + id] -
-      0.5*(dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] +
-           dev_bounds_R[  n_cells+id]*dev_bounds_R[  n_cells+id] +
-           dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0);
-    pr  = fmax(pr, (Real) 1.0e-20);
-
-    al = sqrt(gamma*pl/dev_bounds_L[id]);
-    ar = sqrt(gamma*pl/dev_bounds_R[id]);
-
-    eta_z[id] = 0.5*fabs((dev_bounds_R[3*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[3*n_cells+id]/dev_bounds_L[id] - al));
-
+    id = xid + yid * nx + zid * nx * ny;
+    pl = (dev_bounds_L[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] +
+                                                dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] +
+                                                dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id]) /
+                                               dev_bounds_L[id]) *
+         (gamma - 1.0);
+    pl = fmax(pl, (Real)1.0e-20);
+    pr = (dev_bounds_R[4 * n_cells + id] - 0.5 *
+                                               (dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] +
+                                                dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] +
+                                                dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id]) /
+                                               dev_bounds_R[id]) *
+         (gamma - 1.0);
+    pr = fmax(pr, (Real)1.0e-20);
+
+    al = sqrt(gamma * pl / dev_bounds_L[id]);
+    ar = sqrt(gamma * pl / dev_bounds_R[id]);
+
+    eta_z[id] = 0.5 * fabs((dev_bounds_R[3 * n_cells + id] / dev_bounds_R[id] + ar) -
+                           (dev_bounds_L[3 * n_cells + id] / dev_bounds_L[id] - al));
   }
-
 }
 
-
-
-/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost)
+/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x,
+ int nx, int ny, int nz, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
 __global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost)
 {
-
   // get a thread ID
-  int tid = threadIdx.x + blockIdx.x*blockDim.x;
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   Real etah;
 
   // x-direction
-  if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
-    id = xid + yid*nx + zid*nx*ny;
+  if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+    id = xid + yid * nx + zid * nx * ny;
 
-    etah = fmax(eta_y[xid + (yid-1)*nx + zid*nx*ny], eta_y[xid+1 + (yid-1)*nx + zid*nx*ny]);
+    etah = fmax(eta_y[xid + (yid - 1) * nx + zid * nx * ny], eta_y[xid + 1 + (yid - 1) * nx + zid * nx * ny]);
     etah = fmax(etah, eta_y[id]);
-    etah = fmax(etah, eta_y[xid+1 + yid*nx + zid*nx*ny]);
+    etah = fmax(etah, eta_y[xid + 1 + yid * nx + zid * nx * ny]);
 
-    etah = fmax(etah, eta_z[xid + yid*nx + (zid-1)*nx*ny]);
-    etah = fmax(etah, eta_z[xid+1 + yid*nx + (zid-1)*nx*ny]);
+    etah = fmax(etah, eta_z[xid + yid * nx + (zid - 1) * nx * ny]);
+    etah = fmax(etah, eta_z[xid + 1 + yid * nx + (zid - 1) * nx * ny]);
     etah = fmax(etah, eta_z[id]);
-    etah = fmax(etah, eta_z[xid+1 + yid*nx + zid*nx*ny]);
+    etah = fmax(etah, eta_z[xid + 1 + yid * nx + zid * nx * ny]);
 
     etah = fmax(etah, eta_x[id]);
 
     etah_x[id] = etah;
-
   }
-
 }
 
-
-/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost)
+/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y,
+ int nx, int ny, int nz, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
 __global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost)
 {
-
   // get a thread ID
-  int tid = threadIdx.x + blockIdx.x*blockDim.x;
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   Real etah;
 
   // y-direction
-  if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
-    id = xid + yid*nx + zid*nx*ny;
+  if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+    id = xid + yid * nx + zid * nx * ny;
 
-    etah = fmax(eta_z[xid + yid*nx + (zid-1)*nx*ny], eta_z[xid + (yid+1)*nx + (zid-1)*nx*ny]);
+    etah = fmax(eta_z[xid + yid * nx + (zid - 1) * nx * ny], eta_z[xid + (yid + 1) * nx + (zid - 1) * nx * ny]);
     etah = fmax(etah, eta_z[id]);
-    etah = fmax(etah, eta_z[xid + (yid+1)*nx + zid*nx*ny]);
+    etah = fmax(etah, eta_z[xid + (yid + 1) * nx + zid * nx * ny]);
 
-    etah = fmax(etah, eta_x[xid-1 + yid*nx + zid*nx*ny]);
-    etah = fmax(etah, eta_x[xid-1 + (yid+1)*nx + zid*nx*ny]);
+    etah = fmax(etah, eta_x[xid - 1 + yid * nx + zid * nx * ny]);
+    etah = fmax(etah, eta_x[xid - 1 + (yid + 1) * nx + zid * nx * ny]);
     etah = fmax(etah, eta_x[id]);
-    etah = fmax(etah, eta_x[xid + (yid+1)*nx + zid*nx*ny]);
+    etah = fmax(etah, eta_x[xid + (yid + 1) * nx + zid * nx * ny]);
 
     etah = fmax(etah, eta_y[id]);
 
     etah_y[id] = etah;
-
   }
-
 }
 
-
-
-/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost)
+/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z,
+ int nx, int ny, int nz, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
 __global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost)
 {
-
   // get a thread ID
-  int tid = threadIdx.x + blockIdx.x*blockDim.x;
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   Real etah;
 
   // z-direction
-  if (zid > n_ghost-2 && zid < nz-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost)
-  {
-    id = xid + yid*nx + zid*nx*ny;
+  if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 &&
+      yid < ny - n_ghost) {
+    id = xid + yid * nx + zid * nx * ny;
 
-    etah = fmax(eta_x[xid-1 + yid*nx + zid*nx*ny], eta_x[xid-1 + yid*nx + (zid+1)*nx*ny]);
+    etah = fmax(eta_x[xid - 1 + yid * nx + zid * nx * ny], eta_x[xid - 1 + yid * nx + (zid + 1) * nx * ny]);
     etah = fmax(etah, eta_x[id]);
-    etah = fmax(etah, eta_x[xid + yid*nx + (zid+1)*nx*ny]);
+    etah = fmax(etah, eta_x[xid + yid * nx + (zid + 1) * nx * ny]);
 
-    etah = fmax(etah, eta_y[xid + (yid-1)*nx + zid*nx*ny]);
-    etah = fmax(etah, eta_y[xid + (yid-1)*nx + (zid+1)*nx*ny]);
+    etah = fmax(etah, eta_y[xid + (yid - 1) * nx + zid * nx * ny]);
+    etah = fmax(etah, eta_y[xid + (yid - 1) * nx + (zid + 1) * nx * ny]);
     etah = fmax(etah, eta_y[id]);
-    etah = fmax(etah, eta_y[xid + yid*nx + (zid+1)*nx*ny]);
+    etah = fmax(etah, eta_y[xid + yid * nx + (zid + 1) * nx * ny]);
 
     etah = fmax(etah, eta_z[id]);
 
     etah_z[id] = etah;
-
   }
-
 }
-
-
-#endif //CUDA
diff --git a/src/h_correction/h_correction_3D_cuda.h b/src/h_correction/h_correction_3D_cuda.h
index b22041423..c1d2f8a49 100644
--- a/src/h_correction/h_correction_3D_cuda.h
+++ b/src/h_correction/h_correction_3D_cuda.h
@@ -1,50 +1,59 @@
 /*! \file h_correction_3D_cuda.h
  *  \brief Functions declarations for the H correction kernels.
            Written following Sanders et al. 1998. */
-#ifdef CUDA
+
 #ifndef H_CORRECTION_3D_H
 #define H_CORRECTION_3D_H
 
-#include "../utils/gpu.hpp"
 #include "../global/global.h"
+#include "../utils/gpu.hpp"
 
-
-
-/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma);
-
-
-/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma);
-
-
-/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma)
- *  \brief When passed the left and right boundary values at an interface, calculates
-           the eta value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma);
-
-
-/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost)
+/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int
+ nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost,
+                              Real gamma);
+
+/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int
+ nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost,
+                              Real gamma);
+
+/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int
+ nx, int ny, int nz, int n_ghost, Real gamma)
+ *  \brief When passed the left and right boundary values at an interface,
+ calculates the eta value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost,
+                              Real gamma);
+
+/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x,
+ int nx, int ny, int nz, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost);
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz,
+                               int n_ghost);
 
-
-/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost)
+/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y,
+ int nx, int ny, int nz, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost);
-
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz,
+                               int n_ghost);
 
-/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost)
+/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z,
+ int nx, int ny, int nz, int n_ghost)
  *  \brief When passed the eta values at every interface, calculates
-           the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */
-__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost);
-
+           the eta_h value for the interface according to the forumulation in
+ Sanders et al, 1998. */
+__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz,
+                               int n_ghost);
 
-#endif //H_CORRECTION_3D_H
-#endif //CUDA
+#endif  // H_CORRECTION_3D_H
diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu
index ee033e334..4d0661fbd 100644
--- a/src/hydro/hydro_cuda.cu
+++ b/src/hydro/hydro_cuda.cu
@@ -1,153 +1,159 @@
 /*! \file hydro_cuda.cu
  *  \brief Definitions of functions used in all cuda integration algorithms. */
-#ifdef CUDA
 
-#include <stdio.h>
-#include <math.h>
 #include <float.h>
+#include <math.h>
+#include <stdio.h>
+
+#include <limits>
 
-#include "../utils/gpu.hpp"
 #include "../global/global.h"
 #include "../global/global_cuda.h"
-#include "../hydro/hydro_cuda.h"
 #include "../gravity/static_grav.h"
-#include "../utils/hydro_utilities.h"
+#include "../hydro/hydro_cuda.h"
+#include "../utils/DeviceVector.h"
 #include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
+#include "../utils/hydro_utilities.h"
 #include "../utils/reduction_utilities.h"
 
-
-__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields)
+__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost,
+                                              Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav)
 {
   int id;
-  #ifdef STATIC_GRAV
+#ifdef STATIC_GRAV
   Real d, d_inv, vx;
   Real gx, d_n, d_inv_n, vx_n;
   gx = 0.0;
-  #endif
+#endif
 
-  Real dtodx = dt/dx;
+  Real dtodx = dt / dx;
 
   // get a global thread ID
   id = threadIdx.x + blockIdx.x * blockDim.x;
 
-
   // threads corresponding to real cells do the calculation
-  if (id > n_ghost - 1 && id < n_cells-n_ghost)
-  {
-    #ifdef STATIC_GRAV
-    d  =  dev_conserved[            id];
+  if (id > n_ghost - 1 && id < n_cells - n_ghost) {
+#ifdef STATIC_GRAV
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    #endif
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+#endif
 
     // update the conserved variable array
-    dev_conserved[            id] += dtodx * (dev_F[            id-1] - dev_F[            id]);
-    dev_conserved[  n_cells + id] += dtodx * (dev_F[  n_cells + id-1] - dev_F[  n_cells + id]);
-    dev_conserved[2*n_cells + id] += dtodx * (dev_F[2*n_cells + id-1] - dev_F[2*n_cells + id]);
-    dev_conserved[3*n_cells + id] += dtodx * (dev_F[3*n_cells + id-1] - dev_F[3*n_cells + id]);
-    dev_conserved[4*n_cells + id] += dtodx * (dev_F[4*n_cells + id-1] - dev_F[4*n_cells + id]);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_conserved[(5+i)*n_cells + id] += dtodx * (dev_F[(5+i)*n_cells + id-1] - dev_F[(5+i)*n_cells + id]);
+    dev_conserved[id] += dtodx * (dev_F[id - 1] - dev_F[id]);
+    dev_conserved[n_cells + id] += dtodx * (dev_F[n_cells + id - 1] - dev_F[n_cells + id]);
+    dev_conserved[2 * n_cells + id] += dtodx * (dev_F[2 * n_cells + id - 1] - dev_F[2 * n_cells + id]);
+    dev_conserved[3 * n_cells + id] += dtodx * (dev_F[3 * n_cells + id - 1] - dev_F[3 * n_cells + id]);
+    dev_conserved[4 * n_cells + id] += dtodx * (dev_F[4 * n_cells + id - 1] - dev_F[4 * n_cells + id]);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_conserved[(5 + i) * n_cells + id] +=
+          dtodx * (dev_F[(5 + i) * n_cells + id - 1] - dev_F[(5 + i) * n_cells + id]);
     }
-    #endif
-    #ifdef DE
-    dev_conserved[(n_fields-1)*n_cells + id] += dtodx * (dev_F[(n_fields-1)*n_cells + id-1] - dev_F[(n_fields-1)*n_cells + id]);
-    #endif
-    #ifdef STATIC_GRAV // add gravitational source terms, time averaged from n to n+1
-    calc_g_1D(id, x_off, n_ghost, dx, xbound, &gx);
-    d_n  =  dev_conserved[            id];
+#endif
+#ifdef DE
+    dev_conserved[(n_fields - 1) * n_cells + id] +=
+        dtodx * (dev_F[(n_fields - 1) * n_cells + id - 1] - dev_F[(n_fields - 1) * n_cells + id]);
+#endif
+#ifdef STATIC_GRAV  // add gravitational source terms, time averaged from n to
+                    // n+1
+    calc_g_1D(id, x_off, n_ghost, custom_grav, dx, xbound, &gx);
+    d_n     = dev_conserved[id];
     d_inv_n = 1.0 / d_n;
-    vx_n =  dev_conserved[1*n_cells + id] * d_inv_n;
-    dev_conserved[  n_cells + id] += 0.5*dt*gx*(d + d_n);
-    dev_conserved[4*n_cells + id] += 0.25*dt*gx*(d + d_n)*(vx + vx_n);
-    #endif
-    if (dev_conserved[id] != dev_conserved[id]) printf("%3d Thread crashed in final update. %f\n", id, dev_conserved[id]);
+    vx_n    = dev_conserved[1 * n_cells + id] * d_inv_n;
+    dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n);
+    dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n);
+#endif
+    if (dev_conserved[id] != dev_conserved[id]) {
+      printf("%3d Thread crashed in final update. %f\n", id, dev_conserved[id]);
+    }
     /*
     d  =  dev_conserved[            id];
     d_inv = 1.0 / d;
     vx =  dev_conserved[1*n_cells + id] * d_inv;
     vy =  dev_conserved[2*n_cells + id] * d_inv;
     vz =  dev_conserved[3*n_cells + id] * d_inv;
-    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-    if (P < 0.0) printf("%d Negative pressure after final update.\n", id);
+    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) *
+    (gamma - 1.0); if (P < 0.0) printf("%d Negative pressure after final
+    update.\n", id);
     */
   }
-
-
 }
 
-
-__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, Real gamma, int n_fields)
+__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny,
+                                              int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound,
+                                              Real ybound, Real dt, Real gamma, int n_fields, int custom_grav)
 {
   int id, xid, yid, n_cells;
   int imo, jmo;
 
-  #ifdef STATIC_GRAV
+#ifdef STATIC_GRAV
   Real d, d_inv, vx, vy;
   Real gx, gy, d_n, d_inv_n, vx_n, vy_n;
   gx = 0.0;
   gy = 0.0;
-  #endif
+#endif
 
-  Real dtodx = dt/dx;
-  Real dtody = dt/dy;
+  Real dtodx = dt / dx;
+  Real dtody = dt / dy;
 
-  n_cells = nx*ny;
+  n_cells = nx * ny;
 
   // get a global thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  id = threadIdx.x + blockId * blockDim.x;
-  yid = id / nx;
-  xid = id - yid*nx;
-  imo = xid-1 + yid*nx;
-  jmo = xid + (yid-1)*nx;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  id          = threadIdx.x + blockId * blockDim.x;
+  yid         = id / nx;
+  xid         = id - yid * nx;
+  imo         = xid - 1 + yid * nx;
+  jmo         = xid + (yid - 1) * nx;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost)
-  {
-    #ifdef STATIC_GRAV
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) {
+#ifdef STATIC_GRAV
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    #endif
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+#endif
     // update the conserved variable array
-    dev_conserved[            id] += dtodx * (dev_F_x[            imo] - dev_F_x[            id])
-                                  +  dtody * (dev_F_y[            jmo] - dev_F_y[            id]);
-    dev_conserved[  n_cells + id] += dtodx * (dev_F_x[  n_cells + imo] - dev_F_x[  n_cells + id])
-                                  +  dtody * (dev_F_y[  n_cells + jmo] - dev_F_y[  n_cells + id]);
-    dev_conserved[2*n_cells + id] += dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id])
-                                  +  dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]);
-    dev_conserved[3*n_cells + id] += dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id])
-                                  +  dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]);
-    dev_conserved[4*n_cells + id] += dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id])
-                                  +  dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_conserved[(5+i)*n_cells + id] += dtodx * (dev_F_x[(5+i)*n_cells + imo] - dev_F_x[(5+i)*n_cells + id])
-                                        +  dtody * (dev_F_y[(5+i)*n_cells + jmo] - dev_F_y[(5+i)*n_cells + id]);
+    dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]);
+    dev_conserved[n_cells + id] += dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) +
+                                   dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]);
+    dev_conserved[2 * n_cells + id] += dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) +
+                                       dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]);
+    dev_conserved[3 * n_cells + id] += dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) +
+                                       dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]);
+    dev_conserved[4 * n_cells + id] += dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) +
+                                       dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_conserved[(5 + i) * n_cells + id] +=
+          dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) +
+          dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]);
     }
-    #endif
-    #ifdef DE
-    dev_conserved[(n_fields-1)*n_cells + id] += dtodx * (dev_F_x[(n_fields-1)*n_cells + imo] - dev_F_x[(n_fields-1)*n_cells + id])
-                                  +  dtody * (dev_F_y[(n_fields-1)*n_cells + jmo] - dev_F_y[(n_fields-1)*n_cells + id]);
-    #endif
-    #ifdef STATIC_GRAV
+#endif
+#ifdef DE
+    dev_conserved[(n_fields - 1) * n_cells + id] +=
+        dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) +
+        dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]);
+#endif
+#ifdef STATIC_GRAV
     // calculate the gravitational acceleration as a function of x & y position
-    calc_g_2D(xid, yid, x_off, y_off, n_ghost, dx, dy, xbound, ybound, &gx, &gy);
+    calc_g_2D(xid, yid, x_off, y_off, n_ghost, custom_grav, dx, dy, xbound, ybound, &gx, &gy);
     // add gravitational source terms, time averaged from n to n+1
-    d_n  =  dev_conserved[            id];
+    d_n     = dev_conserved[id];
     d_inv_n = 1.0 / d_n;
-    vx_n =  dev_conserved[1*n_cells + id] * d_inv_n;
-    vy_n =  dev_conserved[2*n_cells + id] * d_inv_n;
-    dev_conserved[  n_cells + id] += 0.5*dt*gx*(d + d_n);
-    dev_conserved[2*n_cells + id] += 0.5*dt*gy*(d + d_n);
-    dev_conserved[4*n_cells + id] += 0.25*dt*gx*(d + d_n)*(vx + vx_n)
-                                  +  0.25*dt*gy*(d + d_n)*(vy + vy_n);
-    #endif
+    vx_n    = dev_conserved[1 * n_cells + id] * d_inv_n;
+    vy_n    = dev_conserved[2 * n_cells + id] * d_inv_n;
+    dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n);
+    dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n);
+    dev_conserved[4 * n_cells + id] +=
+        0.25 * dt * gx * (d + d_n) * (vx + vx_n) + 0.25 * dt * gy * (d + d_n) * (vy + vy_n);
+#endif
     if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id]) {
-      printf("%3d %3d Thread crashed in final update. %f %f %f\n", xid, yid, dtodx*(dev_F_x[imo]-dev_F_x[id]), dtody*(dev_F_y[jmo]-dev_F_y[id]), dev_conserved[id]);
+      printf("%3d %3d Thread crashed in final update. %f %f %f\n", xid, yid, dtodx * (dev_F_x[imo] - dev_F_x[id]),
+             dtody * (dev_F_y[jmo] - dev_F_y[id]), dev_conserved[id]);
     }
     /*
     d  =  dev_conserved[            id];
@@ -155,39 +161,37 @@ __global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x
     vx =  dev_conserved[1*n_cells + id] * d_inv;
     vy =  dev_conserved[2*n_cells + id] * d_inv;
     vz =  dev_conserved[3*n_cells + id] * d_inv;
-    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-    if (P < 0.0)
-      printf("%3d %3d Negative pressure after final update. %f %f %f %f\n", xid, yid, dev_conserved[4*n_cells + id], 0.5*d*vx*vx, 0.5*d*vy*vy, P);
+    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) *
+    (gamma - 1.0); if (P < 0.0) printf("%3d %3d Negative pressure after final
+    update. %f %f %f %f\n", xid, yid, dev_conserved[4*n_cells + id],
+    0.5*d*vx*vx, 0.5*d*vy*vy, P);
     */
   }
-
 }
 
-
-
-__global__ void Update_Conserved_Variables_3D(Real *dev_conserved,
-                                              Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz,
-                                              Real *dev_F_x, Real *dev_F_y,  Real *dev_F_z,
+__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry,
+                                              Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z,
                                               int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost,
                                               Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt,
-                                              Real gamma, int n_fields, Real density_floor, Real *dev_potential )
+                                              Real gamma, int n_fields, int custom_grav, Real density_floor,
+                                              Real *dev_potential)
 {
   int id, xid, yid, zid, n_cells;
   int imo, jmo, kmo;
 
-  #ifdef STATIC_GRAV
+#ifdef STATIC_GRAV
   Real d, d_inv, vx, vy, vz;
   Real gx, gy, gz, d_n, d_inv_n, vx_n, vy_n, vz_n;
   gx = 0.0;
   gy = 0.0;
   gz = 0.0;
-  #endif
+#endif
 
-  #ifdef DENSITY_FLOOR
+#ifdef DENSITY_FLOOR
   Real dens_0;
-  #endif
+#endif
 
-  #ifdef GRAVITY
+#ifdef GRAVITY
   Real d, d_inv, vx, vy, vz;
   Real gx, gy, gz, d_n, d_inv_n, vx_n, vy_n, vz_n;
   Real pot_l, pot_r;
@@ -201,243 +205,235 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved,
   Real pot_ll, pot_rr;
   #endif
 
-  #endif //GRAVITY
+#endif  // GRAVITY
 
-  Real dtodx = dt/dx;
-  Real dtody = dt/dy;
-  Real dtodz = dt/dz;
-  n_cells = nx*ny*nz;
+  Real dtodx = dt / dx;
+  Real dtody = dt / dy;
+  Real dtodz = dt / dz;
+  n_cells    = nx * ny * nz;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-  zid = id / (nx*ny);
-  yid = (id - zid*nx*ny) / nx;
-  xid = id - zid*nx*ny - yid*nx;
-  imo = xid-1 + yid*nx + zid*nx*ny;
-  jmo = xid + (yid-1)*nx + zid*nx*ny;
-  kmo = xid + yid*nx + (zid-1)*nx*ny;
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
+  imo = xid - 1 + yid * nx + zid * nx * ny;
+  jmo = xid + (yid - 1) * nx + zid * nx * ny;
+  kmo = xid + yid * nx + (zid - 1) * nx * ny;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
-    #if defined(STATIC_GRAV) ||  defined(GRAVITY)
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+#if defined(STATIC_GRAV) || defined(GRAVITY)
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    #endif
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+#endif
 
     // update the conserved variable array
-    dev_conserved[            id] += dtodx * (dev_F_x[            imo] - dev_F_x[            id])
-                                  +  dtody * (dev_F_y[            jmo] - dev_F_y[            id])
-                                  +  dtodz * (dev_F_z[            kmo] - dev_F_z[            id]);
-    dev_conserved[  n_cells + id] += dtodx * (dev_F_x[  n_cells + imo] - dev_F_x[  n_cells + id])
-                                  +  dtody * (dev_F_y[  n_cells + jmo] - dev_F_y[  n_cells + id])
-                                  +  dtodz * (dev_F_z[  n_cells + kmo] - dev_F_z[  n_cells + id]);
-    dev_conserved[2*n_cells + id] += dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id])
-                                  +  dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id])
-                                  +  dtodz * (dev_F_z[2*n_cells + kmo] - dev_F_z[2*n_cells + id]);
-    dev_conserved[3*n_cells + id] += dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id])
-                                  +  dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id])
-                                  +  dtodz * (dev_F_z[3*n_cells + kmo] - dev_F_z[3*n_cells + id]);
-    dev_conserved[4*n_cells + id] += dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id])
-                                  +  dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id])
-                                  +  dtodz * (dev_F_z[4*n_cells + kmo] - dev_F_z[4*n_cells + id]);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_conserved[(5+i)*n_cells + id] += dtodx * (dev_F_x[(5+i)*n_cells + imo] - dev_F_x[(5+i)*n_cells + id])
-                                    +  dtody * (dev_F_y[(5+i)*n_cells + jmo] - dev_F_y[(5+i)*n_cells + id])
-                                    +  dtodz * (dev_F_z[(5+i)*n_cells + kmo] - dev_F_z[(5+i)*n_cells + id]);
-      #ifdef COOLING_GRACKLE
-      // If the updated value is negative, then revert to the value before the update
-      if ( dev_conserved[(5+i)*n_cells + id] < 0 ){
-        dev_conserved[(5+i)*n_cells + id] -= dtodx * (dev_F_x[(5+i)*n_cells + imo] - dev_F_x[(5+i)*n_cells + id])
-                                      +  dtody * (dev_F_y[(5+i)*n_cells + jmo] - dev_F_y[(5+i)*n_cells + id])
-                                      +  dtodz * (dev_F_z[(5+i)*n_cells + kmo] - dev_F_z[(5+i)*n_cells + id]);
+    dev_conserved[id] += dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]) +
+                         dtodz * (dev_F_z[kmo] - dev_F_z[id]);
+    dev_conserved[n_cells + id] += dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) +
+                                   dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]) +
+                                   dtodz * (dev_F_z[n_cells + kmo] - dev_F_z[n_cells + id]);
+    dev_conserved[2 * n_cells + id] += dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) +
+                                       dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]) +
+                                       dtodz * (dev_F_z[2 * n_cells + kmo] - dev_F_z[2 * n_cells + id]);
+    dev_conserved[3 * n_cells + id] += dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) +
+                                       dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]) +
+                                       dtodz * (dev_F_z[3 * n_cells + kmo] - dev_F_z[3 * n_cells + id]);
+    dev_conserved[4 * n_cells + id] += dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) +
+                                       dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]) +
+                                       dtodz * (dev_F_z[4 * n_cells + kmo] - dev_F_z[4 * n_cells + id]);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_conserved[(5 + i) * n_cells + id] +=
+          dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) +
+          dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) +
+          dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]);
+  #ifdef COOLING_GRACKLE
+      // If the updated value is negative, then revert to the value before the
+      // update
+      if (dev_conserved[(5 + i) * n_cells + id] < 0) {
+        dev_conserved[(5 + i) * n_cells + id] -=
+            dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) +
+            dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) +
+            dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]);
       }
-      #endif
+  #endif
     }
-    #endif
-    #ifdef DE
-    dev_conserved[(n_fields-1)*n_cells + id] += dtodx * (dev_F_x[(n_fields-1)*n_cells + imo] - dev_F_x[(n_fields-1)*n_cells + id])
-                                  +  dtody * (dev_F_y[(n_fields-1)*n_cells + jmo] - dev_F_y[(n_fields-1)*n_cells + id])
-                                  +  dtodz * (dev_F_z[(n_fields-1)*n_cells + kmo] - dev_F_z[(n_fields-1)*n_cells + id]);
-                                  // +  0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + dtodz*(vz_kmo-vz_kpo));
-                                  //Note: this term is added in a separate kernel to avoid synchronization issues
-    #endif
-
-    #ifdef DENSITY_FLOOR
-    if ( dev_conserved[            id] < density_floor ){
-      if (dev_conserved[            id] > 0){
-        dens_0 = dev_conserved[            id];
+#endif
+#ifdef DE
+    dev_conserved[(n_fields - 1) * n_cells + id] +=
+        dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) +
+        dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) +
+        dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]);
+    // +  0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) +
+    // dtodz*(vz_kmo-vz_kpo));
+    // Note: this term is added in a separate kernel to avoid synchronization
+    // issues
+#endif
+
+#ifdef DENSITY_FLOOR
+    if (dev_conserved[id] < density_floor) {
+      if (dev_conserved[id] > 0) {
+        dens_0 = dev_conserved[id];
         // Set the density to the density floor
-        dev_conserved[            id] = density_floor;
+        dev_conserved[id] = density_floor;
         // Scale the conserved values to the new density
-        dev_conserved[1*n_cells + id] *= (density_floor / dens_0);
-        dev_conserved[2*n_cells + id] *= (density_floor / dens_0);
-        dev_conserved[3*n_cells + id] *= (density_floor / dens_0);
-        dev_conserved[4*n_cells + id] *= (density_floor / dens_0);
-        #ifdef DE
-        dev_conserved[(n_fields-1)*n_cells + id] *= (density_floor / dens_0);
-        #endif
-      }
-      else{
+        dev_conserved[1 * n_cells + id] *= (density_floor / dens_0);
+        dev_conserved[2 * n_cells + id] *= (density_floor / dens_0);
+        dev_conserved[3 * n_cells + id] *= (density_floor / dens_0);
+        dev_conserved[4 * n_cells + id] *= (density_floor / dens_0);
+  #ifdef DE
+        dev_conserved[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0);
+  #endif
+      } else {
         // If the density is negative: average the density on that cell
-        dens_0 = dev_conserved[            id];
-        Average_Cell_Single_Field( 0, xid, yid, zid, nx, ny, nz, n_cells, dev_conserved );
+        dens_0 = dev_conserved[id];
+        Average_Cell_Single_Field(0, xid, yid, zid, nx, ny, nz, n_cells, dev_conserved);
       }
     }
-    #endif//DENSITY_FLOOR
+#endif  // DENSITY_FLOOR
 
-    #ifdef STATIC_GRAV
-    calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz);
-    d_n  =  dev_conserved[            id];
+#ifdef STATIC_GRAV
+    calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, custom_grav, dx, dy, dz, xbound, ybound, zbound, &gx, &gy,
+              &gz);
+    d_n     = dev_conserved[id];
     d_inv_n = 1.0 / d_n;
-    vx_n =  dev_conserved[1*n_cells + id] * d_inv_n;
-    vy_n =  dev_conserved[2*n_cells + id] * d_inv_n;
-    vz_n =  dev_conserved[3*n_cells + id] * d_inv_n;
-    dev_conserved[  n_cells + id] += 0.5*dt*gx*(d + d_n);
-    dev_conserved[2*n_cells + id] += 0.5*dt*gy*(d + d_n);
-    dev_conserved[3*n_cells + id] += 0.5*dt*gz*(d + d_n);
-    dev_conserved[4*n_cells + id] += 0.25*dt*gx*(d + d_n)*(vx + vx_n)
-                                  +  0.25*dt*gy*(d + d_n)*(vy + vy_n)
-                                  +  0.25*dt*gz*(d + d_n)*(vz + vz_n);
-    #endif
-
-    #ifdef GRAVITY
-    d_n  =  dev_conserved[            id];
+    vx_n    = dev_conserved[1 * n_cells + id] * d_inv_n;
+    vy_n    = dev_conserved[2 * n_cells + id] * d_inv_n;
+    vz_n    = dev_conserved[3 * n_cells + id] * d_inv_n;
+    dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n);
+    dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n);
+    dev_conserved[3 * n_cells + id] += 0.5 * dt * gz * (d + d_n);
+    dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n) +
+                                       0.25 * dt * gy * (d + d_n) * (vy + vy_n) +
+                                       0.25 * dt * gz * (d + d_n) * (vz + vz_n);
+#endif
+
+#ifdef GRAVITY
+    d_n     = dev_conserved[id];
     d_inv_n = 1.0 / d_n;
-    vx_n =  dev_conserved[1*n_cells + id] * d_inv_n;
-    vy_n =  dev_conserved[2*n_cells + id] * d_inv_n;
-    vz_n =  dev_conserved[3*n_cells + id] * d_inv_n;
+    vx_n    = dev_conserved[1 * n_cells + id] * d_inv_n;
+    vy_n    = dev_conserved[2 * n_cells + id] * d_inv_n;
+    vz_n    = dev_conserved[3 * n_cells + id] * d_inv_n;
 
     // Calculate the -gradient of potential
     // Get X componet of gravity field
-    id_l = (xid-1) + (yid)*nx + (zid)*nx*ny;
-    id_r = (xid+1) + (yid)*nx + (zid)*nx*ny;
+    id_l  = (xid - 1) + (yid)*nx + (zid)*nx * ny;
+    id_r  = (xid + 1) + (yid)*nx + (zid)*nx * ny;
     pot_l = dev_potential[id_l];
     pot_r = dev_potential[id_r];
-    #ifdef GRAVITY_5_POINTS_GRADIENT
-    id_ll = (xid-2) + (yid)*nx + (zid)*nx*ny;
-    id_rr = (xid+2) + (yid)*nx + (zid)*nx*ny;
+  #ifdef GRAVITY_5_POINTS_GRADIENT
+    id_ll  = (xid - 2) + (yid)*nx + (zid)*nx * ny;
+    id_rr  = (xid + 2) + (yid)*nx + (zid)*nx * ny;
     pot_ll = dev_potential[id_ll];
     pot_rr = dev_potential[id_rr];
-    gx = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx);
-    #else
-    gx = -0.5*( pot_r - pot_l ) / dx;
-    #endif
-
-    //Get Y componet of gravity field
-    id_l = (xid) + (yid-1)*nx + (zid)*nx*ny;
-    id_r = (xid) + (yid+1)*nx + (zid)*nx*ny;
+    gx     = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx);
+  #else
+    gx = -0.5 * (pot_r - pot_l) / dx;
+  #endif
+
+    // Get Y componet of gravity field
+    id_l  = (xid) + (yid - 1) * nx + (zid)*nx * ny;
+    id_r  = (xid) + (yid + 1) * nx + (zid)*nx * ny;
     pot_l = dev_potential[id_l];
     pot_r = dev_potential[id_r];
-    #ifdef GRAVITY_5_POINTS_GRADIENT
-    id_ll = (xid) + (yid-2)*nx + (zid)*nx*ny;
-    id_rr = (xid) + (yid+2)*nx + (zid)*nx*ny;
+  #ifdef GRAVITY_5_POINTS_GRADIENT
+    id_ll  = (xid) + (yid - 2) * nx + (zid)*nx * ny;
+    id_rr  = (xid) + (yid + 2) * nx + (zid)*nx * ny;
     pot_ll = dev_potential[id_ll];
     pot_rr = dev_potential[id_rr];
-    gy = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx);
-    #else
-    gy = -0.5*( pot_r - pot_l ) / dy;
-    #endif
-    //Get Z componet of gravity field
-    id_l = (xid) + (yid)*nx + (zid-1)*nx*ny;
-    id_r = (xid) + (yid)*nx + (zid+1)*nx*ny;
+    gy     = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx);
+  #else
+    gy = -0.5 * (pot_r - pot_l) / dy;
+  #endif
+    // Get Z componet of gravity field
+    id_l  = (xid) + (yid)*nx + (zid - 1) * nx * ny;
+    id_r  = (xid) + (yid)*nx + (zid + 1) * nx * ny;
     pot_l = dev_potential[id_l];
     pot_r = dev_potential[id_r];
-    #ifdef GRAVITY_5_POINTS_GRADIENT
-    id_ll = (xid) + (yid)*nx + (zid-2)*nx*ny;
-    id_rr = (xid) + (yid)*nx + (zid+2)*nx*ny;
+  #ifdef GRAVITY_5_POINTS_GRADIENT
+    id_ll  = (xid) + (yid)*nx + (zid - 2) * nx * ny;
+    id_rr  = (xid) + (yid)*nx + (zid + 2) * nx * ny;
     pot_ll = dev_potential[id_ll];
     pot_rr = dev_potential[id_rr];
-    gz = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx);
-    #else
-    gz = -0.5*( pot_r - pot_l ) / dz;
-    #endif
-
-    //Add gravity term to Momentum
-    dev_conserved[  n_cells + id] += 0.5*dt*gx*(d + d_n);
-    dev_conserved[2*n_cells + id] += 0.5*dt*gy*(d + d_n);
-    dev_conserved[3*n_cells + id] += 0.5*dt*gz*(d + d_n);
-
-    //Add gravity term to Total Energy
-    //Add the work done by the gravitational force
-    dev_conserved[4*n_cells + id] += 0.5* dt * ( gx*(d*vx + d_n*vx_n) +  gy*(d*vy + d_n*vy_n) +  gz*(d*vz + d_n*vz_n) );
-
-    #endif
-
+    gz     = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx);
+  #else
+    gz = -0.5 * (pot_r - pot_l) / dz;
+  #endif
 
-    #if !( defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR) )
-    if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4*n_cells + id] < 0.0 || dev_conserved[4*n_cells+id] != dev_conserved[4*n_cells+id]) {
-      printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid+x_off, yid+y_off, zid+z_off, dev_conserved[id], dtodx*(dev_F_x[imo]-dev_F_x[id]), dtody*(dev_F_y[jmo]-dev_F_y[id]), dtodz*(dev_F_z[kmo]-dev_F_z[id]), dev_conserved[4*n_cells+id]);
+    // Add gravity term to Momentum
+    dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n);
+    dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n);
+    dev_conserved[3 * n_cells + id] += 0.5 * dt * gz * (d + d_n);
+
+    // Add gravity term to Total Energy
+    // Add the work done by the gravitational force
+    dev_conserved[4 * n_cells + id] +=
+        0.5 * dt * (gx * (d * vx + d_n * vx_n) + gy * (d * vy + d_n * vy_n) + gz * (d * vz + d_n * vz_n));
+
+#endif  // GRAVITY
+
+#if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR))
+    if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4 * n_cells + id] < 0.0 ||
+        dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) {
+      printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off,
+             dev_conserved[id], dtodx * (dev_F_x[imo] - dev_F_x[id]), dtody * (dev_F_y[jmo] - dev_F_y[id]),
+             dtodz * (dev_F_z[kmo] - dev_F_z[id]), dev_conserved[4 * n_cells + id]);
+      Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved);
     }
-    #endif//DENSITY_FLOOR
+#endif  // DENSITY_FLOOR
     /*
     d  =  dev_conserved[            id];
     d_inv = 1.0 / d;
     vx =  dev_conserved[1*n_cells + id] * d_inv;
     vy =  dev_conserved[2*n_cells + id] * d_inv;
     vz =  dev_conserved[3*n_cells + id] * d_inv;
-    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-    if (P < 0.0) printf("%3d %3d %3d Negative pressure after final update. %f %f %f %f %f\n", xid, yid, zid, dev_conserved[4*n_cells + id], 0.5*d*vx*vx, 0.5*d*vy*vy, 0.5*d*vz*vz, P);
+    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) *
+    (gamma - 1.0); if (P < 0.0) printf("%3d %3d %3d Negative pressure after
+    final update. %f %f %f %f %f\n", xid, yid, zid, dev_conserved[4*n_cells +
+    id], 0.5*d*vx*vx, 0.5*d*vy*vy, 0.5*d*vz*vz, P);
     */
   }
-
 }
 
- __device__ __host__ Real hydroInverseCrossingTime(Real const &E,
-                                                   Real const &d,
-                                                   Real const &d_inv,
-                                                   Real const &vx,
-                                                   Real const &vy,
-                                                   Real const &vz,
-                                                   Real const &dx,
-                                                   Real const &dy,
-                                                   Real const &dz,
-                                                   Real const &gamma)
+__device__ __host__ Real hydroInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx,
+                                                  Real const &vy, Real const &vz, Real const &dx, Real const &dy,
+                                                  Real const &dz, Real const &gamma)
 {
   // Compute pressure and sound speed
-  Real P  = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
+  Real P  = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
   Real cs = sqrt(d_inv * gamma * P);
 
   // Find maximum inverse crossing time in the cell (i.e. minimum crossing time)
-  Real cellMaxInverseDt = fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy);
-  cellMaxInverseDt      = fmax(cellMaxInverseDt, (fabs(vz)+cs)/dz);
+  Real cellMaxInverseDt = fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy);
+  cellMaxInverseDt      = fmax(cellMaxInverseDt, (fabs(vz) + cs) / dz);
   cellMaxInverseDt      = fmax(cellMaxInverseDt, 0.0);
 
   return cellMaxInverseDt;
 }
 
-__device__ __host__ Real mhdInverseCrossingTime(Real const &E,
-                                                Real const &d,
-                                                Real const &d_inv,
-                                                Real const &vx,
-                                                Real const &vy,
-                                                Real const &vz,
-                                                Real const &avgBx,
-                                                Real const &avgBy,
-                                                Real const &avgBz,
-                                                Real const &dx,
-                                                Real const &dy,
-                                                Real const &dz,
+__device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx,
+                                                Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy,
+                                                Real const &avgBz, Real const &dx, Real const &dy, Real const &dz,
                                                 Real const &gamma)
 {
   // Compute the gas pressure and fast magnetosonic speed
-  Real gasP = mhdUtils::computeGasPressure(E, d, vx*d, vy*d, vz*d, avgBx, avgBy, avgBz, gamma);
-  Real cf   = mhdUtils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma);
+  Real gasP = hydro_utilities::Calc_Pressure_Primitive(E, d, vx, vy, vz, gamma, avgBx, avgBy, avgBz);
+  Real cf   = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma);
 
   // Find maximum inverse crossing time in the cell (i.e. minimum crossing time)
-  Real cellMaxInverseDt = fmax((fabs(vx)+cf)/dx, (fabs(vy)+cf)/dy);
-  cellMaxInverseDt      = fmax(cellMaxInverseDt, (fabs(vz)+cf)/dz);
+  Real cellMaxInverseDt = fmax((fabs(vx) + cf) / dx, (fabs(vy) + cf) / dy);
+  cellMaxInverseDt      = fmax(cellMaxInverseDt, (fabs(vz) + cf) / dz);
   cellMaxInverseDt      = fmax(cellMaxInverseDt, 0.0);
 
   return cellMaxInverseDt;
 }
 
-
-
 __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, Real dx)
 {
   Real max_dti = -DBL_MAX;
@@ -450,401 +446,378 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n
   // but setting it to int results in some kind of silent over/underflow issue
   // even though we're not hitting those kinds of numbers. Setting it to type
   // uint or size_t fixes them
-  for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x)
-  {
+  for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) {
     // threads corresponding to real cells do the calculation
-    if (id > n_ghost - 1 && id < n_cells-n_ghost)
-    {
+    if (id > n_ghost - 1 && id < n_cells - n_ghost) {
       // start timestep calculation here
-      // every thread collects the conserved variables it needs from global memory
-      d  =  dev_conserved[            id];
+      // every thread collects the conserved variables it needs from global
+      // memory
+      d     = dev_conserved[id];
       d_inv = 1.0 / d;
-      vx =  dev_conserved[1*n_cells + id] * d_inv;
-      vy =  dev_conserved[2*n_cells + id] * d_inv;
-      vz =  dev_conserved[3*n_cells + id] * d_inv;
-      P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-      P  = fmax(P, (Real) TINY_NUMBER);
-      // find the max wavespeed in that cell, use it to calculate the inverse timestep
-      cs = sqrt(d_inv * gamma * P);
-      max_dti = fmax(max_dti,(fabs(vx)+cs)/dx);
+      vx    = dev_conserved[1 * n_cells + id] * d_inv;
+      vy    = dev_conserved[2 * n_cells + id] * d_inv;
+      vz    = dev_conserved[3 * n_cells + id] * d_inv;
+      P     = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
+      P     = fmax(P, (Real)TINY_NUMBER);
+      // find the max wavespeed in that cell, use it to calculate the inverse
+      // timestep
+      cs      = sqrt(d_inv * gamma * P);
+      max_dti = fmax(max_dti, (fabs(vx) + cs) / dx);
     }
   }
 
-  // do the block wide reduction (find the max inverse timestep in the block)
-  // then write it to that block's location in the dev_dti array
-  max_dti = reduction_utilities::blockReduceMax(max_dti);
-  if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti;
+  // do the grid wide reduction (find the max inverse timestep in the grid)
+  reduction_utilities::gridReduceMax(max_dti, dev_dti);
 }
 
-
-
-__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, int ny, Real dx, Real dy)
+__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, int ny, Real dx,
+                           Real dy)
 {
   Real max_dti = -DBL_MAX;
 
   Real d, d_inv, vx, vy, vz, P, cs;
   int xid, yid, n_cells;
-  n_cells = nx*ny;
+  n_cells = nx * ny;
 
   // Grid stride loop to perform as much of the reduction as possible. The
   // fact that `id` has type `size_t` is important. I'm not totally sure why
   // but setting it to int results in some kind of silent over/underflow issue
   // even though we're not hitting those kinds of numbers. Setting it to type
   // uint or size_t fixes them
-  for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x)
-  {
+  for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) {
     // get a global thread ID
     yid = id / nx;
-    xid = id - yid*nx;
+    xid = id - yid * nx;
 
     // threads corresponding to real cells do the calculation
-    if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost)
-    {
-      // every thread collects the conserved variables it needs from global memory
-      d  =  dev_conserved[            id];
+    if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) {
+      // every thread collects the conserved variables it needs from global
+      // memory
+      d     = dev_conserved[id];
       d_inv = 1.0 / d;
-      vx =  dev_conserved[1*n_cells + id] * d_inv;
-      vy =  dev_conserved[2*n_cells + id] * d_inv;
-      vz =  dev_conserved[3*n_cells + id] * d_inv;
-      P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-      P  = fmax(P, (Real) 1.0e-20);
-      // find the max wavespeed in that cell, use it to calculate the inverse timestep
-      cs = sqrt(d_inv * gamma * P);
-      max_dti = fmax(max_dti,fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy));
+      vx    = dev_conserved[1 * n_cells + id] * d_inv;
+      vy    = dev_conserved[2 * n_cells + id] * d_inv;
+      vz    = dev_conserved[3 * n_cells + id] * d_inv;
+      P     = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
+      P     = fmax(P, (Real)1.0e-20);
+      // find the max wavespeed in that cell, use it to calculate the inverse
+      // timestep
+      cs      = sqrt(d_inv * gamma * P);
+      max_dti = fmax(max_dti, fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy));
     }
   }
 
-  // do the block wide reduction (find the max inverse timestep in the block)
-  // then write it to that block's location in the dev_dti array
-  max_dti = reduction_utilities::blockReduceMax(max_dti);
-  if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti;
+  // do the grid wide reduction (find the max inverse timestep in the grid)
+  reduction_utilities::gridReduceMax(max_dti, dev_dti);
 }
 
-
-__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz)
+__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny,
+                           int nz, Real dx, Real dy, Real dz)
 {
   Real max_dti = -DBL_MAX;
 
   Real d, d_inv, vx, vy, vz, E;
-  #ifdef  MHD
-    Real avgBx, avgBy, avgBz;
-  #endif  //MHD
   int xid, yid, zid, n_cells;
 
-  n_cells = nx*ny*nz;
+  n_cells = nx * ny * nz;
 
   // Grid stride loop to perform as much of the reduction as possible. The
   // fact that `id` has type `size_t` is important. I'm not totally sure why
   // but setting it to int results in some kind of silent over/underflow issue
   // even though we're not hitting those kinds of numbers. Setting it to type
   // uint or size_t fixes them
-  for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x)
-  {
+  for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) {
     // get a global thread ID
     cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid);
 
     // threads corresponding to real cells do the calculation
-    if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-    {
-      // every thread collects the conserved variables it needs from global memory
-      d     = dev_conserved[            id];
+    if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+        zid < nz - n_ghost) {
+      // every thread collects the conserved variables it needs from global
+      // memory
+      d     = dev_conserved[id];
       d_inv = 1.0 / d;
-      vx    = dev_conserved[1*n_cells + id] * d_inv;
-      vy    = dev_conserved[2*n_cells + id] * d_inv;
-      vz    = dev_conserved[3*n_cells + id] * d_inv;
-      E     = dev_conserved[4*n_cells + id];
-      #ifdef  MHD
-        // Compute the cell centered magnetic field using a straight average of
-        // the faces
-        mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz);
-      #endif  //MHD
-
-      // Compute the maximum inverse crossing time in the cell
-      #ifdef  MHD
-        max_dti = fmax(max_dti,mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma));
-      #else  // not MHD
-        max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma));
-      #endif  //MHD
-
+      vx    = dev_conserved[1 * n_cells + id] * d_inv;
+      vy    = dev_conserved[2 * n_cells + id] * d_inv;
+      vz    = dev_conserved[3 * n_cells + id] * d_inv;
+      E     = dev_conserved[4 * n_cells + id];
+
+// Compute the maximum inverse crossing time in the cell
+#ifdef MHD
+      // Compute the cell centered magnetic field using a straight average of
+      // the faces
+      auto const [avgBx, avgBy, avgBz] =
+          mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny);
+      max_dti = fmax(max_dti, mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma));
+#else   // not MHD
+      max_dti = fmax(max_dti, hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma));
+#endif  // MHD
     }
   }
 
-  // do the block wide reduction (find the max inverse timestep in the block)
-  // then write it to that block's location in the dev_dti array
-  max_dti = reduction_utilities::blockReduceMax(max_dti);
-  if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti;
+  // do the grid wide reduction (find the max inverse timestep in the grid)
+  reduction_utilities::gridReduceMax(max_dti, dev_dti);
 }
 
-Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma )
+Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz,
+                 Real gamma)
 {
-  // set values for GPU kernels
-  uint threadsPerBlock, numBlocks;
-  int ngrid = (nx*ny*nz + TPB - 1 )/TPB;
-  // reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); // Uncomment this if we fix the AtomicDouble bug - Alwin
-  threadsPerBlock = TPB;
-  numBlocks = ngrid;
-
-  Real* dev_dti = dev_dti_array;
+  // Allocate the device memory
+  cuda_utilities::DeviceVector<Real> static dev_dti(1);
 
+  // Set the device side inverse time step to the smallest possible double so
+  // that the reduction isn't using the maximum value of the previous iteration
+  dev_dti.assign(std::numeric_limits<double>::lowest());
 
   // compute dt and store in dev_dti
-  if (nx > 1 && ny == 1 && nz == 1) //1D
+  if (nx > 1 && ny == 1 && nz == 1)  // 1D
   {
-    hipLaunchKernelGGL(Calc_dt_1D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, dx);
-  }
-  else if (nx > 1 && ny > 1 && nz == 1) //2D
+    // set launch parameters for GPU kernels.
+    cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_1D);
+    hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved,
+                       dev_dti.data(), gamma, n_ghost, nx, dx);
+  } else if (nx > 1 && ny > 1 && nz == 1)  // 2D
   {
-    hipLaunchKernelGGL(Calc_dt_2D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, ny, dx, dy);
-  }
-  else if (nx > 1 && ny > 1 && nz > 1) //3D
+    // set launch parameters for GPU kernels.
+    cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_2D);
+    hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved,
+                       dev_dti.data(), gamma, n_ghost, nx, ny, dx, dy);
+  } else if (nx > 1 && ny > 1 && nz > 1)  // 3D
   {
-    hipLaunchKernelGGL(Calc_dt_3D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz);
-  }
-  CudaCheckError();
-
-  Real max_dti=0;
-
-  /* Uncomment the below if we fix the AtomicDouble bug - Alwin
-  // copy device side max_dti to host side max_dti
-
-
-  CudaSafeCall( cudaMemcpy(&max_dti, dev_dti, sizeof(Real), cudaMemcpyDeviceToHost) );
-  cudaDeviceSynchronize();
-
-  return max_dti;
-  */
-
-  int dev_dti_length = numBlocks;
-  CudaSafeCall(cudaMemcpy(host_dti_array,dev_dti, dev_dti_length*sizeof(Real), cudaMemcpyDeviceToHost));
-  cudaDeviceSynchronize();
-
-  for (int i=0;i<dev_dti_length;i++){
-    max_dti = fmax(max_dti,host_dti_array[i]);
+    // set launch parameters for GPU kernels.
+    cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_3D);
+    hipLaunchKernelGGL(Calc_dt_3D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved,
+                       dev_dti.data(), gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz);
   }
+  GPU_Error_Check();
 
-  return max_dti;
+  // Note: dev_dti[0] is DeviceVector syntactic sugar for returning a value via
+  // cudaMemcpy
+  return dev_dti[0];
 }
 
-
 #ifdef AVERAGE_SLOW_CELLS
 
-void Average_Slow_Cells( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ){
-
+void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy,
+                        Real dz, Real gamma, Real max_dti_slow)
+{
   // set values for GPU kernels
-  int n_cells = nx*ny*nz;
-  int ngrid = (n_cells + TPB - 1) / TPB;
+  int n_cells = nx * ny * nz;
+  int ngrid   = (n_cells + TPB - 1) / TPB;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB, 1, 1);
 
-  if (nx > 1 && ny > 1 && nz > 1){ //3D
-    hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, gamma, max_dti_slow );
+  if (nx > 1 && ny > 1 && nz > 1) {  // 3D
+    hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields,
+                       dx, dy, dz, gamma, max_dti_slow);
   }
 }
 
-__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ){
-
+__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx,
+                                      Real dy, Real dz, Real gamma, Real max_dti_slow)
+{
   int id, xid, yid, zid, n_cells;
   Real d, d_inv, vx, vy, vz, E, max_dti;
-  #ifdef  MHD
-    Real avgBx, avgBy, avgBz;
-  #endif  //MHD
+  Real speed, temp, P, cs;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-  n_cells = nx*ny*nz;
+  id      = threadIdx.x + blockIdx.x * blockDim.x;
+  n_cells = nx * ny * nz;
 
   cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid);
 
-
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    E  =  dev_conserved[4*n_cells + id];
-
-    #ifdef  MHD
-      // Compute the cell centered magnetic field using a straight average of the faces
-      mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz);
-    #endif  //MHD
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    E     = dev_conserved[4 * n_cells + id];
 
     // Compute the maximum inverse crossing time in the cell
-    #ifdef  MHD
-      max_dti = mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma);
-    #else  // not MHD
-      max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma);
-    #endif  //MHD
+    max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma);
 
-    if (max_dti > max_dti_slow){
+    if (max_dti > max_dti_slow) {
+      speed = sqrt(vx * vx + vy * vy + vz * vz);
+      temp  = (gamma - 1) * (E - 0.5 * (speed * speed) * d) * ENERGY_UNIT / (d * DENSITY_UNIT / 0.6 / MP) / KB;
+      P     = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
+      cs    = sqrt(d_inv * gamma * P) * VELOCITY_UNIT * 1e-5;
       // Average this cell
-      printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f    dt_min=%f\n", xid, yid, zid, 1./max_dti,  1./max_dti_slow );
-      Average_Cell_All_Fields( xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved );
+      kernel_printf(
+          " Average Slow Cell [ %d %d %d ] -> dt_cell=%f    dt_min=%f, n=%.3e, "
+          "T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n",
+          xid, yid, zid, 1. / max_dti, 1. / max_dti_slow, dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp,
+          speed * VELOCITY_UNIT * 1e-5, vx * VELOCITY_UNIT * 1e-5, vy * VELOCITY_UNIT * 1e-5, vz * VELOCITY_UNIT * 1e-5,
+          cs);
+      Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved);
     }
   }
 }
-#endif //AVERAGE_SLOW_CELLS
-
+#endif  // AVERAGE_SLOW_CELLS
 
 #ifdef DE
-__global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields ){
-
+__global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx,
+                                                           int n_ghost, Real dx, Real dt, Real gamma, int n_fields)
+{
   int id, xid, n_cells;
   int imo, ipo;
   Real d, d_inv, vx, vy, vz;
   Real vx_imo, vx_ipo;
-  Real  P, E, E_kin, GE;
+  Real P, E, E_kin, GE;
 
-
-  Real dtodx = dt/dx;
-  n_cells = nx;
+  Real dtodx = dt / dx;
+  n_cells    = nx;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
   xid = id;
 
-
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost)
-  {
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost) {
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    GE = dev_conserved[(n_fields-1)*n_cells + id];
-    E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz );
-    P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma );
-    P  = fmax(P, (Real) TINY_NUMBER);
-
-    imo = xid-1;
-    ipo = xid+1;
-
-    vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo];
-    vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo];
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    GE    = dev_conserved[(n_fields - 1) * n_cells + id];
+    E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz);
+    P     = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma);
+    P     = fmax(P, (Real)TINY_NUMBER);
+
+    imo = xid - 1;
+    ipo = xid + 1;
+
+    vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo];
+    vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo];
 
     // Use center values of neighbor cells for the divergence of velocity
-    dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo));
-
+    dev_conserved[(n_fields - 1) * n_cells + id] += 0.5 * P * (dtodx * (vx_imo - vx_ipo));
   }
 }
 
-
-__global__ void Partial_Update_Advected_Internal_Energy_2D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields ){
-
+__global__ void Partial_Update_Advected_Internal_Energy_2D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly,
+                                                           Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy,
+                                                           Real dt, Real gamma, int n_fields)
+{
   int id, xid, yid, n_cells;
   int imo, jmo;
   int ipo, jpo;
   Real d, d_inv, vx, vy, vz;
   Real vx_imo, vx_ipo, vy_jmo, vy_jpo;
-  Real  P, E, E_kin, GE;
+  Real P, E, E_kin, GE;
 
-
-  Real dtodx = dt/dx;
-  Real dtody = dt/dy;
-  n_cells = nx*ny;
+  Real dtodx = dt / dx;
+  Real dtody = dt / dy;
+  n_cells    = nx * ny;
 
   // get a global thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  id = threadIdx.x + blockId * blockDim.x;
-  yid = id / nx;
-  xid = id - yid*nx;
-
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  id          = threadIdx.x + blockId * blockDim.x;
+  yid         = id / nx;
+  xid         = id - yid * nx;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost)
-  {
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) {
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    GE = dev_conserved[(n_fields-1)*n_cells + id];
-    E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz );
-    P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma );
-    P  = fmax(P, (Real) TINY_NUMBER);
-
-    imo = xid-1 + yid*nx;
-    ipo = xid+1 + yid*nx;
-    jmo = xid + (yid-1)*nx;
-    jpo = xid + (yid+1)*nx;
-
-    vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo];
-    vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo];
-    vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo];
-    vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo];
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    GE    = dev_conserved[(n_fields - 1) * n_cells + id];
+    E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz);
+    P     = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma);
+    P     = fmax(P, (Real)TINY_NUMBER);
+
+    imo = xid - 1 + yid * nx;
+    ipo = xid + 1 + yid * nx;
+    jmo = xid + (yid - 1) * nx;
+    jpo = xid + (yid + 1) * nx;
+
+    vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo];
+    vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo];
+    vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo];
+    vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo];
 
     // Use center values of neighbor cells for the divergence of velocity
-    dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo));
-
+    dev_conserved[(n_fields - 1) * n_cells + id] += 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo));
   }
 }
 
-__global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz,  int n_ghost, Real dx, Real dy, Real dz,  Real dt, Real gamma, int n_fields ){
-
+__global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly,
+                                                           Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz,
+                                                           int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma,
+                                                           int n_fields)
+{
   int id, xid, yid, zid, n_cells;
   int imo, jmo, kmo;
   int ipo, jpo, kpo;
   Real d, d_inv, vx, vy, vz;
   Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo;
-  Real  P, E, E_kin, GE;
+  Real P, E, E_kin, GE;
   // Real vx_L, vx_R, vy_L, vy_R, vz_L, vz_R;
 
-
-  Real dtodx = dt/dx;
-  Real dtody = dt/dy;
-  Real dtodz = dt/dz;
-  n_cells = nx*ny*nz;
+  Real dtodx = dt / dx;
+  Real dtody = dt / dy;
+  Real dtodz = dt / dz;
+  n_cells    = nx * ny * nz;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-  zid = id / (nx*ny);
-  yid = (id - zid*nx*ny) / nx;
-  xid = id - zid*nx*ny - yid*nx;
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    GE = dev_conserved[(n_fields-1)*n_cells + id];
-    E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz );
-    P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma );
-    P  = fmax(P, (Real) TINY_NUMBER);
-
-    imo = xid-1 + yid*nx + zid*nx*ny;
-    jmo = xid + (yid-1)*nx + zid*nx*ny;
-    kmo = xid + yid*nx + (zid-1)*nx*ny;
-
-    ipo = xid+1 + yid*nx + zid*nx*ny;
-    jpo = xid + (yid+1)*nx + zid*nx*ny;
-    kpo = xid + yid*nx + (zid+1)*nx*ny;
-
-    vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo];
-    vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo];
-    vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo];
-    vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo];
-    vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo];
-    vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo];
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    GE    = dev_conserved[(n_fields - 1) * n_cells + id];
+    E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz);
+  #ifdef MHD
+    // Add the magnetic energy
+    auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny);
+    E_kin += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z);
+  #endif  // MHD
+    P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma);
+    P = fmax(P, (Real)TINY_NUMBER);
+
+    imo = xid - 1 + yid * nx + zid * nx * ny;
+    jmo = xid + (yid - 1) * nx + zid * nx * ny;
+    kmo = xid + yid * nx + (zid - 1) * nx * ny;
+
+    ipo = xid + 1 + yid * nx + zid * nx * ny;
+    jpo = xid + (yid + 1) * nx + zid * nx * ny;
+    kpo = xid + yid * nx + (zid + 1) * nx * ny;
+
+    vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo];
+    vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo];
+    vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo];
+    vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo];
+    vz_kmo = dev_conserved[3 * n_cells + kmo] / dev_conserved[kmo];
+    vz_kpo = dev_conserved[3 * n_cells + kpo] / dev_conserved[kpo];
 
     // Use center values of neighbor cells for the divergence of velocity
-    dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + dtodz*(vz_kmo-vz_kpo));
+    dev_conserved[(n_fields - 1) * n_cells + id] +=
+        0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo));
 
-    // OPTION 2: Use the reconstructed velocities to compute the velocity gradient
-    //Use the reconstructed Velocities instead of neighbor cells centered values
+    // OPTION 2: Use the reconstructed velocities to compute the velocity
+    // gradient
+    // Use the reconstructed Velocities instead of neighbor cells centered
+    // values
     // vx_R = Q_Lx[1*n_cells + id]  / Q_Lx[id];
     // vx_L = Q_Rx[1*n_cells + imo] / Q_Rx[imo];
     // vy_R = Q_Ly[2*n_cells + id]  / Q_Ly[id];
@@ -852,183 +825,193 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved,
     // vz_R = Q_Lz[3*n_cells + id]  / Q_Lz[id];
     // vz_L = Q_Rz[3*n_cells + kmo] / Q_Rz[kmo];
 
-    //Use the reconstructed Velocities instead of neighbor cells centered values
-    // dev_conserved[(n_fields-1)*n_cells + id] +=  P * ( dtodx * ( vx_L - vx_R ) + dtody * ( vy_L - vy_R ) + dtodz * ( vz_L - vz_R ) );
-
-
+    // Use the reconstructed Velocities instead of neighbor cells centered
+    // values
+    //  dev_conserved[(n_fields-1)*n_cells + id] +=  P * ( dtodx * ( vx_L - vx_R
+    //  ) + dtody * ( vy_L - vy_R ) + dtodz * ( vz_L - vz_R ) );
   }
 }
 
-
-__global__ void Select_Internal_Energy_1D( Real *dev_conserved, int nx, int n_ghost, int n_fields ){
-
+__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_ghost, int n_fields)
+{
   int id, xid, n_cells;
   Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax;
   int imo, ipo;
   n_cells = nx;
 
+  Real eta_1 = DE_ETA_1;
   Real eta_2 = DE_ETA_2;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
   xid = id;
 
-  imo = max(xid-1, n_ghost);
-  ipo = min(xid+1, nx-n_ghost-1);
-
+  imo = max(xid - 1, n_ghost);
+  ipo = min(xid + 1, nx - n_ghost - 1);
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost)
-  {
+  if (xid > n_ghost - 1 && xid < nx - n_ghost) {
     // every thread collects the conserved variables it needs from global memory
-    d  =  dev_conserved[            id];
-    d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    E  =  dev_conserved[4*n_cells + id];
-    U_advected = dev_conserved[(n_fields-1)*n_cells + id];
-    U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz );
-
-    //find the max nearby total energy
-    Emax = fmax(dev_conserved[4*n_cells + imo], E);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]);
-
-    if (U_total/Emax > eta_2 ) U = U_total;
-    else U = U_advected;
-
-    //Optional: Avoid Negative Internal  Energies
-    U = fmax(U, (Real) TINY_NUMBER);
+    d          = dev_conserved[id];
+    d_inv      = 1.0 / d;
+    vx         = dev_conserved[1 * n_cells + id] * d_inv;
+    vy         = dev_conserved[2 * n_cells + id] * d_inv;
+    vz         = dev_conserved[3 * n_cells + id] * d_inv;
+    E          = dev_conserved[4 * n_cells + id];
+    U_advected = dev_conserved[(n_fields - 1) * n_cells + id];
+    U_total    = E - 0.5 * d * (vx * vx + vy * vy + vz * vz);
+
+    // find the max nearby total energy
+    Emax = fmax(dev_conserved[4 * n_cells + imo], E);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]);
+
+    // We only use the "advected" internal energy if both:
+    // - the thermal energy divided by total energy is a small fraction (smaller than eta_1)
+    // - AND we aren't masking shock heating (details controlled by Emax & eta_2)
+    if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) {
+      U = U_total;
+    } else {
+      U = U_advected;
+    }
 
-    //Write Selected internal energy to the GasEnergy array ONLY
-    //to avoid mixing updated and non-updated values of E
-    //since the Dual Energy condition depends on the neighbor cells
-    dev_conserved[(n_fields-1)*n_cells + id] = U;
+    // Optional: Avoid Negative Internal  Energies
+    U = fmax(U, (Real)TINY_NUMBER);
 
+    // Write Selected internal energy to the GasEnergy array ONLY
+    // to avoid mixing updated and non-updated values of E
+    // since the Dual Energy condition depends on the neighbor cells
+    dev_conserved[(n_fields - 1) * n_cells + id] = U;
   }
 }
 
-
-__global__ void Select_Internal_Energy_2D( Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields ){
-
+__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields)
+{
   int id, xid, yid, n_cells;
   Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax;
   int imo, ipo, jmo, jpo;
-  n_cells = nx*ny;
+  n_cells = nx * ny;
 
+  Real eta_1 = DE_ETA_1;
   Real eta_2 = DE_ETA_2;
 
   // get a global thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  id = threadIdx.x + blockId * blockDim.x;
-  yid = id / nx;
-  xid = id - yid*nx;
-
-  imo = max(xid-1, n_ghost);
-  imo = imo + yid*nx;
-  ipo = min(xid+1, nx-n_ghost-1);
-  ipo = ipo + yid*nx;
-  jmo = max(yid-1, n_ghost);
-  jmo = xid + jmo*nx;
-  jpo = min(yid+1, ny-n_ghost-1);
-  jpo = xid + jpo*nx;
-
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  id          = threadIdx.x + blockId * blockDim.x;
+  yid         = id / nx;
+  xid         = id - yid * nx;
+
+  imo = max(xid - 1, n_ghost);
+  imo = imo + yid * nx;
+  ipo = min(xid + 1, nx - n_ghost - 1);
+  ipo = ipo + yid * nx;
+  jmo = max(yid - 1, n_ghost);
+  jmo = xid + jmo * nx;
+  jpo = min(yid + 1, ny - n_ghost - 1);
+  jpo = xid + jpo * nx;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost)
-  {
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) {
     // every thread collects the conserved variables it needs from global memory
-    d  =  dev_conserved[            id];
-    d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    E  =  dev_conserved[4*n_cells + id];
-    U_advected = dev_conserved[(n_fields-1)*n_cells + id];
-    U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz );
-
-    //find the max nearby total energy
-    Emax = fmax(dev_conserved[4*n_cells + imo], E);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + jmo]);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + jpo]);
-
-    if (U_total/Emax > eta_2 ) U = U_total;
-    else U = U_advected;
-
-    //Optional: Avoid Negative Internal  Energies
-    U = fmax(U, (Real) TINY_NUMBER);
+    d          = dev_conserved[id];
+    d_inv      = 1.0 / d;
+    vx         = dev_conserved[1 * n_cells + id] * d_inv;
+    vy         = dev_conserved[2 * n_cells + id] * d_inv;
+    vz         = dev_conserved[3 * n_cells + id] * d_inv;
+    E          = dev_conserved[4 * n_cells + id];
+    U_advected = dev_conserved[(n_fields - 1) * n_cells + id];
+    U_total    = E - 0.5 * d * (vx * vx + vy * vy + vz * vz);
+
+    // find the max nearby total energy
+    Emax = fmax(dev_conserved[4 * n_cells + imo], E);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]);
+
+    // We only use the "advected" internal energy if both:
+    // - the thermal energy divided by total energy is a small fraction (smaller than eta_1)
+    // - AND we aren't masking shock heating (details controlled by Emax & eta_2)
+    if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) {
+      U = U_total;
+    } else {
+      U = U_advected;
+    }
 
-    //Write Selected internal energy to the GasEnergy array ONLY
-    //to avoid mixing updated and non-updated values of E
-    //since the Dual Energy condition depends on the neighbour cells
-    dev_conserved[(n_fields-1)*n_cells + id] = U;
+    // Optional: Avoid Negative Internal  Energies
+    U = fmax(U, (Real)TINY_NUMBER);
 
+    // Write Selected internal energy to the GasEnergy array ONLY
+    // to avoid mixing updated and non-updated values of E
+    // since the Dual Energy condition depends on the neighbour cells
+    dev_conserved[(n_fields - 1) * n_cells + id] = U;
   }
 }
 
-
-__global__ void Select_Internal_Energy_3D( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields ){
-
+__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields)
+{
   int id, xid, yid, zid, n_cells;
   Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax;
   int imo, ipo, jmo, jpo, kmo, kpo;
-  n_cells = nx*ny*nz;
+  n_cells = nx * ny * nz;
 
+  Real eta_1 = DE_ETA_1;
   Real eta_2 = DE_ETA_2;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-  zid = id / (nx*ny);
-  yid = (id - zid*nx*ny) / nx;
-  xid = id - zid*nx*ny - yid*nx;
-
-  imo = max(xid-1, n_ghost);
-  imo = imo + yid*nx + zid*nx*ny;
-  ipo = min(xid+1, nx-n_ghost-1);
-  ipo = ipo + yid*nx + zid*nx*ny;
-  jmo = max(yid-1, n_ghost);
-  jmo = xid + jmo*nx + zid*nx*ny;
-  jpo = min(yid+1, ny-n_ghost-1);
-  jpo = xid + jpo*nx + zid*nx*ny;
-  kmo = max(zid-1, n_ghost);
-  kmo = xid + yid*nx + kmo*nx*ny;
-  kpo = min(zid+1, nz-n_ghost-1);
-  kpo = xid + yid*nx + kpo*nx*ny;
-
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
+
+  imo = max(xid - 1, n_ghost);
+  imo = imo + yid * nx + zid * nx * ny;
+  ipo = min(xid + 1, nx - n_ghost - 1);
+  ipo = ipo + yid * nx + zid * nx * ny;
+  jmo = max(yid - 1, n_ghost);
+  jmo = xid + jmo * nx + zid * nx * ny;
+  jpo = min(yid + 1, ny - n_ghost - 1);
+  jpo = xid + jpo * nx + zid * nx * ny;
+  kmo = max(zid - 1, n_ghost);
+  kmo = xid + yid * nx + kmo * nx * ny;
+  kpo = min(zid + 1, nz - n_ghost - 1);
+  kpo = xid + yid * nx + kpo * nx * ny;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
     // every thread collects the conserved variables it needs from global memory
-    d  =  dev_conserved[            id];
-    d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    E  =  dev_conserved[4*n_cells + id];
-    U_advected = dev_conserved[(n_fields-1)*n_cells + id];
-    U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz );
-
-    //find the max nearby total energy
-    Emax = fmax(dev_conserved[4*n_cells + imo], E);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + jmo]);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + jpo]);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + kmo]);
-    Emax = fmax(Emax, dev_conserved[4*n_cells + kpo]);
-
-    if (U_total/Emax > eta_2 ) U = U_total;
-    else U = U_advected;
-
-    //Optional: Avoid Negative Internal  Energies
-    U = fmax(U, (Real) TINY_NUMBER);
+    d          = dev_conserved[id];
+    d_inv      = 1.0 / d;
+    vx         = dev_conserved[1 * n_cells + id] * d_inv;
+    vy         = dev_conserved[2 * n_cells + id] * d_inv;
+    vz         = dev_conserved[3 * n_cells + id] * d_inv;
+    E          = dev_conserved[4 * n_cells + id];
+    U_advected = dev_conserved[(n_fields - 1) * n_cells + id];
+    U_total    = E - 0.5 * d * (vx * vx + vy * vy + vz * vz);
+
+    // find the max nearby total energy
+    Emax = fmax(dev_conserved[4 * n_cells + imo], E);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + kmo]);
+    Emax = fmax(Emax, dev_conserved[4 * n_cells + kpo]);
+
+    // We only use the "advected" internal energy if both:
+    // - the thermal energy divided by total energy is a small fraction (smaller than eta_1)
+    // - AND we aren't masking shock heating (details controlled by Emax & eta_2)
+    if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) {
+      U = U_total;
+    } else {
+      U = U_advected;
+    }
 
-    //Write Selected internal energy to the GasEnergy array ONLY
-    //to avoid mixing updated and non-updated values of E
-    //since the Dual Energy condition depends on the neighbour cells
-    dev_conserved[(n_fields-1)*n_cells + id] = U;
+    // Optional: Avoid Negative Internal  Energies
+    U = fmax(U, (Real)TINY_NUMBER);
 
+    // Write Selected internal energy to the GasEnergy array ONLY
+    // to avoid mixing updated and non-updated values of E
+    // since the Dual Energy condition depends on the neighbour cells
+    dev_conserved[(n_fields - 1) * n_cells + id] = U;
   }
 }
 
@@ -1039,178 +1022,286 @@ __global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real
   n_cells = nx;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
   xid = id;
 
-
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost)
-  {
+  if (xid > n_ghost - 1 && xid < nx - n_ghost) {
     // every thread collects the conserved variables it needs from global memory
-    d  =  dev_conserved[            id];
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    U = dev_conserved[(n_fields-1)*n_cells + id];
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    U     = dev_conserved[(n_fields - 1) * n_cells + id];
 
-    //Use the previously selected Internal Energy to update the total energy
-    dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U;
+    // Use the previously selected Internal Energy to update the total energy
+    dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U;
   }
-
 }
 
-
 __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields)
 {
   int id, xid, yid, n_cells;
   Real d, d_inv, vx, vy, vz, U;
-  n_cells = nx*ny;
+  n_cells = nx * ny;
 
   // get a global thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  id = threadIdx.x + blockId * blockDim.x;
-  yid = id / nx;
-  xid = id - yid*nx;
-
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  id          = threadIdx.x + blockId * blockDim.x;
+  yid         = id / nx;
+  xid         = id - yid * nx;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost)
-  {
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) {
     // every thread collects the conserved variables it needs from global memory
-    d  =  dev_conserved[            id];
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    U = dev_conserved[(n_fields-1)*n_cells + id];
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    U     = dev_conserved[(n_fields - 1) * n_cells + id];
 
-    //Use the previously selected Internal Energy to update the total energy
-    dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U;
+    // Use the previously selected Internal Energy to update the total energy
+    dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U;
   }
-
 }
 
-
 __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields)
 {
-  //Called in a separate kernel to avoid interfering with energy selection in Select_Internal_Energy
+  // Called in a separate kernel to avoid interfering with energy selection in
+  // Select_Internal_Energy
 
   int id, xid, yid, zid, n_cells;
   Real d, d_inv, vx, vy, vz, U;
-  n_cells = nx*ny*nz;
+  n_cells = nx * ny * nz;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-  zid = id / (nx*ny);
-  yid = (id - zid*nx*ny) / nx;
-  xid = id - zid*nx*ny - yid*nx;
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
     // every thread collects the conserved variables it needs from global memory
-    d  =  dev_conserved[            id];
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    U = dev_conserved[(n_fields-1)*n_cells + id];
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    U     = dev_conserved[(n_fields - 1) * n_cells + id];
 
-    //Use the previously selected Internal Energy to update the total energy
-    dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U;
+    // Use the previously selected Internal Energy to update the total energy
+    dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U;
   }
 }
 
+#endif  // DE
 
-#endif //DE
+void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor)
+{
+  // set values for GPU kernels
+  int n_cells = nx * ny * nz;
+  int ngrid   = (n_cells + TPB - 1) / TPB;
+  // number of blocks per 1D grid
+  dim3 dim1dGrid(ngrid, 1, 1);
+  //  number of threads per 1D block
+  dim3 dim1dBlock(TPB, 1, 1);
+
+  hipLaunchKernelGGL(Temperature_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost,
+                     n_fields, U_floor);
+}
 
-#ifdef TEMPERATURE_FLOOR
-__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields,  Real U_floor )
+__global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields,
+                                         Real U_floor)
 {
   int id, xid, yid, zid, n_cells;
   Real d, d_inv, vx, vy, vz, E, Ekin, U;
-  n_cells = nx*ny*nz;
+  n_cells = nx * ny * nz;
 
   // get a global thread ID
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-  zid = id / (nx*ny);
-  yid = (id - zid*nx*ny) / nx;
-  xid = id - zid*nx*ny - yid*nx;
-
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
 
   // threads corresponding to real cells do the calculation
-  if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost)
-  {
-    d  =  dev_conserved[            id];
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    E  =  dev_conserved[4*n_cells + id];
-    Ekin = 0.5 * d * ( vx*vx + vy*vy + vz*vz );
-
-    U = ( E - Ekin ) / d;
-    if ( U < U_floor ) dev_conserved[4*n_cells + id] = Ekin + d*U_floor;
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    E     = dev_conserved[4 * n_cells + id];
+    Ekin  = 0.5 * d * (vx * vx + vy * vy + vz * vz);
+
+    U = (E - Ekin) / d;
+    if (U < U_floor) {
+      dev_conserved[4 * n_cells + id] = Ekin + d * U_floor;
+    }
 
-    #ifdef DE
-    U = dev_conserved[(n_fields-1)*n_cells + id] / d ;
-    if ( U < U_floor ) dev_conserved[(n_fields-1)*n_cells + id] = d*U_floor ;
-    #endif
+#ifdef DE
+    U = dev_conserved[(n_fields - 1) * n_cells + id] / d;
+    if (U < U_floor) {
+      dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor;
+    }
+#endif
   }
 }
-#endif //TEMPERATURE_FLOOR
-
 
-__device__ Real Average_Cell_Single_Field( int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved ){
+__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells,
+                                          Real *conserved)
+{
   Real v_l, v_r, v_d, v_u, v_b, v_t, v_avrg;
   int id;
 
-  id = (i-1) + (j)*nx + (k)*nx*ny;
-  v_l = conserved[ field_indx*ncells + id ];
-  id = (i+1) + (j)*nx + (k)*nx*ny;
-  v_r = conserved[ field_indx*ncells + id ];
-  id = (i) + (j-1)*nx + (k)*nx*ny;
-  v_d = conserved[ field_indx*ncells + id ];
-  id = (i) + (j+1)*nx + (k)*nx*ny;
-  v_u = conserved[ field_indx*ncells + id ];
-  id = (i) + (j)*nx + (k-1)*nx*ny;
-  v_b = conserved[ field_indx*ncells + id ];
-  id = (i) + (j)*nx + (k+1)*nx*ny;
-  v_t = conserved[ field_indx*ncells + id ];
-  v_avrg = ( v_l + v_r + v_d + v_u + v_b + v_t ) / 6;
-  id = (i) + (j)*nx + (k)*nx*ny;
-  conserved[ field_indx*ncells + id ] = v_avrg;
+  id                                  = (i - 1) + (j)*nx + (k)*nx * ny;
+  v_l                                 = conserved[field_indx * ncells + id];
+  id                                  = (i + 1) + (j)*nx + (k)*nx * ny;
+  v_r                                 = conserved[field_indx * ncells + id];
+  id                                  = (i) + (j - 1) * nx + (k)*nx * ny;
+  v_d                                 = conserved[field_indx * ncells + id];
+  id                                  = (i) + (j + 1) * nx + (k)*nx * ny;
+  v_u                                 = conserved[field_indx * ncells + id];
+  id                                  = (i) + (j)*nx + (k - 1) * nx * ny;
+  v_b                                 = conserved[field_indx * ncells + id];
+  id                                  = (i) + (j)*nx + (k + 1) * nx * ny;
+  v_t                                 = conserved[field_indx * ncells + id];
+  v_avrg                              = (v_l + v_r + v_d + v_u + v_b + v_t) / 6;
+  id                                  = (i) + (j)*nx + (k)*nx * ny;
+  conserved[field_indx * ncells + id] = v_avrg;
   return v_avrg;
+}
+
+__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields,
+                                        Real gamma, Real *conserved)
+{
+  int id = i + (j)*nx + (k)*nx * ny;
+
+  Real d, mx, my, mz, E, P;
+  d  = conserved[grid_enum::density * ncells + id];
+  mx = conserved[grid_enum::momentum_x * ncells + id];
+  my = conserved[grid_enum::momentum_y * ncells + id];
+  mz = conserved[grid_enum::momentum_z * ncells + id];
+  E  = conserved[grid_enum::Energy * ncells + id];
+  P  = (E - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0);
+
+  printf("%3d %3d %3d BC: d: %e  E:%e  P:%e  vx:%e  vy:%e  vz:%e\n", i, j, k, d, E, P, mx / d, my / d, mz / d);
+
+  int idn;
+  int N = 0;
+  Real d_av, vx_av, vy_av, vz_av, P_av;
+  d_av = vx_av = vy_av = vz_av = P_av = 0.0;
+#ifdef SCALAR
+  Real scalar[NSCALARS], scalar_av[NSCALARS];
+  for (int n = 0; n < NSCALARS; n++) {  // NOLINT
+    scalar_av[n] = 0.0;
+  }
+#endif
+
+  for (int kk = k - 1; kk <= k + 1; kk++) {
+    for (int jj = j - 1; jj <= j + 1; jj++) {
+      for (int ii = i - 1; ii <= i + 1; ii++) {
+        idn = ii + jj * nx + kk * nx * ny;
+        d   = conserved[grid_enum::density * ncells + idn];
+        mx  = conserved[grid_enum::momentum_x * ncells + idn];
+        my  = conserved[grid_enum::momentum_y * ncells + idn];
+        mz  = conserved[grid_enum::momentum_z * ncells + idn];
+        P   = (conserved[grid_enum::Energy * ncells + idn] - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0);
+#ifdef SCALAR
+        for (int n = 0; n < NSCALARS; n++) {  // NOLINT
+          scalar[n] = conserved[grid_enum::scalar * ncells + idn];
+        }
+#endif
+        if (d > 0.0 && P > 0.0) {
+          d_av += d;
+          vx_av += mx;
+          vy_av += my;
+          vz_av += mz;
+          P_av += P / (gamma - 1.0);
+#ifdef SCALAR
+          for (int n = 0; n < NSCALARS; n++) {  // NOLINT
+            scalar_av[n] += scalar[n];
+          }
+#endif
+          N++;
+        }
+      }
+    }
+  }
 
+  P_av  = P_av / N;
+  vx_av = vx_av / d_av;
+  vy_av = vy_av / d_av;
+  vz_av = vz_av / d_av;
+#ifdef SCALAR
+  for (int n = 0; n < NSCALARS; n++) {  // NOLINT
+    scalar_av[n] = scalar_av[n] / d_av;
+  }
+#endif
+  d_av = d_av / N;
+
+  // replace cell values with new averaged values
+  conserved[id + ncells * grid_enum::density]    = d_av;
+  conserved[id + ncells * grid_enum::momentum_x] = d_av * vx_av;
+  conserved[id + ncells * grid_enum::momentum_y] = d_av * vy_av;
+  conserved[id + ncells * grid_enum::momentum_z] = d_av * vz_av;
+  conserved[id + ncells * grid_enum::Energy] =
+      P_av / (gamma - 1.0) + 0.5 * d_av * (vx_av * vx_av + vy_av * vy_av + vz_av * vz_av);
+#ifdef DE
+  conserved[id + ncells * grid_enum::GasEnergy] = P_av / (gamma - 1.0);
+#endif
+#ifdef SCALAR
+  for (int n = 0; n < NSCALARS; n++) {  // NOLINT
+    conserved[id + ncells * grid_enum::scalar] = d_av * scalar_av[n];
+  }
+#endif
+
+  d = d_av;
+  E = P_av / (gamma - 1.0) + 0.5 * d_av * (vx_av * vx_av + vy_av * vy_av + vz_av * vz_av);
+  P = P_av;
+
+  printf("%3d %3d %3d FC: d: %e  E:%e  P:%e  vx:%e  vy:%e  vz:%e\n", i, j, k, d, E, P, vx_av, vy_av, vz_av);
 }
 
-__device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved ){
-
-  // Average Density
-  Average_Cell_Single_Field( 0, i, j, k, nx, ny, nz, ncells, conserved );
-  // Average Momentum_x
-  Average_Cell_Single_Field( 1, i, j, k, nx, ny, nz, ncells, conserved );
-  // Average Momentum_y
-  Average_Cell_Single_Field( 2, i, j, k, nx, ny, nz, ncells, conserved );
-  // Average Momentum_z
-  Average_Cell_Single_Field( 3, i, j, k, nx, ny, nz, ncells, conserved );
-  // Average Energy
-  Average_Cell_Single_Field( 4, i, j, k, nx, ny, nz, ncells, conserved );
-  #ifdef  MHD
-    // Average MHD
-    Average_Cell_Single_Field( 5+NSCALARS, i,   j,   k,   nx, ny, nz, ncells, conserved );
-    Average_Cell_Single_Field( 6+NSCALARS, i,   j,   k,   nx, ny, nz, ncells, conserved );
-    Average_Cell_Single_Field( 7+NSCALARS, i,   j,   k,   nx, ny, nz, ncells, conserved );
-    Average_Cell_Single_Field( 5+NSCALARS, i-1, j,   k,   nx, ny, nz, ncells, conserved );
-    Average_Cell_Single_Field( 6+NSCALARS, i,   j-1, k,   nx, ny, nz, ncells, conserved );
-    Average_Cell_Single_Field( 7+NSCALARS, i,   j,   k-1, nx, ny, nz, ncells, conserved );
-  #endif  //MHD
-  #ifdef DE
-  // Average GasEnergy
-  Average_Cell_Single_Field( n_fields-1, i, j, k, nx, ny, nz, ncells, conserved );
-  #endif  //DE
+void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor)
+{
+  // set values for GPU kernels
+  int n_cells = nx * ny * nz;
+  int ngrid   = (n_cells + TPB - 1) / TPB;
+  // number of blocks per 1D grid
+  dim3 dim1dGrid(ngrid, 1, 1);
+  //  number of threads per 1D block
+  dim3 dim1dBlock(TPB, 1, 1);
+
+  hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, field_num,
+                     scalar_floor);
 }
 
+__global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num,
+                                    Real scalar_floor)
+{
+  int id, xid, yid, zid, n_cells;
+  Real scalar;  // variable to store the value of the scalar before a floor is applied
+  n_cells = nx * ny * nz;
 
-#endif //CUDA
+  // get a global thread ID
+  id  = threadIdx.x + blockIdx.x * blockDim.x;
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
+
+  // threads corresponding to real cells do the calculation
+  if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 &&
+      zid < nz - n_ghost) {
+    scalar = dev_conserved[id + n_cells * field_num];
+
+    if (scalar < scalar_floor) {
+      // printf("###Thread scalar change  %f -> %f \n", scalar, scalar_floor);
+      dev_conserved[id + n_cells * field_num] = scalar_floor;
+    }
+  }
+}
diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h
index c801882d3..f167d0745 100644
--- a/src/hydro/hydro_cuda.h
+++ b/src/hydro/hydro_cuda.h
@@ -1,21 +1,25 @@
 /*! \file hydro_cuda.h
  *  \brief Declarations of functions used in all cuda integration algorithms. */
 
-#ifdef CUDA
 #ifndef HYDRO_CUDA_H
 #define HYDRO_CUDA_H
 
 #include "../global/global.h"
 #include "../utils/mhd_utilities.h"
 
-__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields);
+__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost,
+                                              Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav);
 
+__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny,
+                                              int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound,
+                                              Real ybound, Real dt, Real gamma, int n_fields, int custom_grav);
 
-__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, Real gamma, int n_fields);
-
-
-__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y,  Real *dev_F_z, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, Real gamma, int n_fields, Real density_floor, Real *dev_potential );
-
+__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry,
+                                              Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z,
+                                              int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost,
+                                              Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt,
+                                              Real gamma, int n_fields, int custom_grav, Real density_floor,
+                                              Real *dev_potential);
 
 /*!
  * \brief Determine the maximum inverse crossing time in a specific cell
@@ -32,16 +36,9 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R
  * \param[in] gamma The adiabatic index
  * \return Real The maximum inverse crossing time in the cell
  */
-__device__ __host__ Real hydroInverseCrossingTime(Real const &E,
-                                                  Real const &d,
-                                                  Real const &d_inv,
-                                                  Real const &vx,
-                                                  Real const &vy,
-                                                  Real const &vz,
-                                                  Real const &dx,
-                                                  Real const &dy,
-                                                  Real const &dz,
-                                                  Real const &gamma);
+__device__ __host__ Real hydroInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx,
+                                                  Real const &vy, Real const &vz, Real const &dx, Real const &dy,
+                                                  Real const &dz, Real const &gamma);
 
 /*!
  * \brief Determine the maximum inverse crossing time in a specific cell
@@ -61,59 +58,64 @@ __device__ __host__ Real hydroInverseCrossingTime(Real const &E,
  * \param[in] gamma The adiabatic index
  * \return Real The maximum inverse crossing time in the cell
  */
-__device__ __host__ Real mhdInverseCrossingTime(Real const &E,
-                                                Real const &d,
-                                                Real const &d_inv,
-                                                Real const &vx,
-                                                Real const &vy,
-                                                Real const &vz,
-                                                Real const &avgBx,
-                                                Real const &avgBy,
-                                                Real const &avgBz,
-                                                Real const &dx,
-                                                Real const &dy,
-                                                Real const &dz,
+__device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx,
+                                                Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy,
+                                                Real const &avgBz, Real const &dx, Real const &dy, Real const &dz,
                                                 Real const &gamma);
 
-__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz); 
+__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny,
+                           int nz, Real dx, Real dy, Real dz);
 
-Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma );
+Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz,
+                 Real gamma);
 
 __global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real gamma, int n_fields);
 
-
 __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields);
 
-
 __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields);
 
 #ifdef AVERAGE_SLOW_CELLS
 
-void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow );
+void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy,
+                        Real dz, Real gamma, Real max_dti_slow);
 
-__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow );
+__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx,
+                                      Real dy, Real dz, Real gamma, Real max_dti_slow);
 #endif
 
-#ifdef TEMPERATURE_FLOOR
-__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields,  Real U_floor );
-#endif
+void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor);
+
+__global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields,
+                                         Real U_floor);
+
+void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor);
 
-__global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields );
+__global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num,
+                                    Real scalar_floor);
 
-__global__ void Partial_Update_Advected_Internal_Energy_2D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields );
+__global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx,
+                                                           int n_ghost, Real dx, Real dt, Real gamma, int n_fields);
 
-__global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz,  int n_ghost, Real dx, Real dy, Real dz,  Real dt, Real gamma, int n_fields );
+__global__ void Partial_Update_Advected_Internal_Energy_2D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly,
+                                                           Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy,
+                                                           Real dt, Real gamma, int n_fields);
 
-__global__ void Select_Internal_Energy_1D( Real *dev_conserved, int nx, int n_ghost, int n_fields );
+__global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly,
+                                                           Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz,
+                                                           int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma,
+                                                           int n_fields);
 
-__global__ void Select_Internal_Energy_2D( Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields );
+__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_ghost, int n_fields);
 
-__global__ void Select_Internal_Energy_3D( Real *dev_conserved, int nx, int ny, int nz,  int n_ghost, int n_fields );
+__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields);
 
-__device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved );
+__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields);
 
-__device__ Real Average_Cell_Single_Field( int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved );
+__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields,
+                                        Real gamma, Real *conserved);
 
+__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells,
+                                          Real *conserved);
 
-#endif //HYDRO_CUDA_H
-#endif //CUDA
+#endif  // HYDRO_CUDA_H
diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu
index a6d00e96b..c289f1551 100644
--- a/src/hydro/hydro_cuda_tests.cu
+++ b/src/hydro/hydro_cuda_tests.cu
@@ -1,90 +1,80 @@
 /*!
-* \file hydro_cuda_tests.cu
-* \author Evan Schneider (evs34@pitt.edu)
-* \brief Test the code units within hydro_cuda.cu
-*
-*/
+ * \file hydro_cuda_tests.cu
+ * \author Evan Schneider (evs34@pitt.edu)
+ * \brief Test the code units within hydro_cuda.cu
+ *
+ */
 
 // STL Includes
+#include <stdlib.h>
+
 #include <iostream>
-#include <vector>
 #include <string>
-#include <stdlib.h>
+#include <vector>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local Includes
 #include "../global/global.h"
 #include "../global/global_cuda.h"
+#include "../hydro/hydro_cuda.h"  // Include code to test
+#include "../utils/DeviceVector.h"
 #include "../utils/gpu.hpp"
 #include "../utils/testing_utilities.h"
-#include "../hydro/hydro_cuda.h"   // Include code to test
-
-#if defined(CUDA)
 
 // =============================================================================
 // Tests for the Calc_dt_GPU function
 // =============================================================================
 TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput)
 {
-
-  Real* testDt;
-  cudaHostAlloc(&testDt, sizeof(Real), cudaHostAllocDefault);
-
   // Call the function we are testing
   int num_blocks = 1;
   dim3 dim1dGrid(num_blocks, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  int const nx = 1;
-  int const ny = 1;
-  int const nz = 1;
-  int const n_fields   = 5;  // Total number of conserved fields
-  int const  n_ghost = 0;
-  Real dx = 1.0;
-  Real dy = 1.0;
-  Real dz = 1.0;
-  Real *host_conserved;
-  Real *dev_conserved;
-  Real *dev_dti_array;
-  Real gamma = 5.0/3.0;
-
-  // Allocate host and device arrays and copy data
-  cudaHostAlloc(&host_conserved, n_fields*sizeof(Real), cudaHostAllocDefault);
-  CudaSafeCall(cudaMalloc(&dev_conserved, n_fields*sizeof(Real)));
-  CudaSafeCall(cudaMalloc(&dev_dti_array, sizeof(Real)));
+  int const nx       = 1;
+  int const ny       = 1;
+  int const nz       = 1;
+  int const n_fields = 5;  // Total number of conserved fields
+  int const n_ghost  = 0;
+  Real dx            = 1.0;
+  Real dy            = 1.0;
+  Real dz            = 1.0;
+  std::vector<Real> host_conserved(n_fields);
+  cuda_utilities::DeviceVector<Real> dev_conserved(n_fields);
+  cuda_utilities::DeviceVector<Real> dev_dti(1);
+  Real gamma = 5.0 / 3.0;
 
   // Set values of conserved variables for input (host)
-  host_conserved[0] = 1.0; // density
-  host_conserved[1] = 0.0; // x momentum
-  host_conserved[2] = 0.0; // y momentum
-  host_conserved[3] = 0.0; // z momentum
-  host_conserved[4] = 1.0; // Energy
+  host_conserved.at(0) = 1.0;  // density
+  host_conserved.at(1) = 0.0;  // x momentum
+  host_conserved.at(2) = 0.0;  // y momentum
+  host_conserved.at(3) = 0.0;  // z momentum
+  host_conserved.at(4) = 1.0;  // Energy
 
   // Copy host data to device arrray
-  CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, n_fields*sizeof(Real), cudaMemcpyHostToDevice));
-  //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz)                        
+  dev_conserved.cpyHostToDevice(host_conserved);
+  //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma,
+  // int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real
+  // dz)
 
   // Run the kernel
-  hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_dti_array, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz);
-  CudaCheckError();
-
-  // Copy the dt value back from the GPU
-  CudaSafeCall(cudaMemcpy(testDt, dev_dti_array, sizeof(Real), cudaMemcpyDeviceToHost));
+  hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), dev_dti.data(), gamma, n_ghost,
+                     n_fields, nx, ny, nz, dx, dy, dz);
+  GPU_Error_Check();
 
   // Compare results
   // Check for equality and if not equal return difference
-  double fiducialDt = 1.0540925533894598;
-  double testData = testDt[0];
+  double const fiducialDt = 1.0540925533894598;
+  double const testData   = dev_dti.at(0);
   double absoluteDiff;
   int64_t ulpsDiff;
   bool areEqual;
-  areEqual = testingUtilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff);
-  EXPECT_TRUE(areEqual)
-    << "The fiducial value is:       " << fiducialDt << std::endl
-    << "The test value is:           " << testData     << std::endl
-    << "The absolute difference is:  " << absoluteDiff << std::endl
-    << "The ULP difference is:       " << ulpsDiff     << std::endl;
+  areEqual = testing_utilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff);
+  EXPECT_TRUE(areEqual) << "The fiducial value is:       " << fiducialDt << std::endl
+                        << "The test value is:           " << testData << std::endl
+                        << "The absolute difference is:  " << absoluteDiff << std::endl
+                        << "The ULP difference is:       " << ulpsDiff << std::endl;
 }
 // =============================================================================
 // End of tests for the Calc_dt_GPU function
@@ -93,37 +83,28 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput)
 // =============================================================================
 // Tests for the hydroInverseCrossingTime function
 // =============================================================================
-TEST(tHYDROHydroInverseCrossingTime,
-     CorrectInputExpectCorrectOutput)
+TEST(tHYDROHydroInverseCrossingTime, CorrectInputExpectCorrectOutput)
 {
-// Set test values
-double const energy    = 7.6976906577e2;
-double const density   = 1.6756968986;
-double const velocityX = 7.0829278656;
-double const velocityY = 5.9283073464;
-double const velocityZ = 8.8417748226;
-double const cellSizeX = 8.1019429453e2;
-double const cellSizeY = 7.1254780684e2;
-double const cellSizeZ = 7.5676716066e2;
-double const gamma = 5./3.;
-
-// Fiducial Values
-double const fiducialInverseCrossingTime = 0.038751126881804446;
-
-// Function to test
-double testInverseCrossingTime = hydroInverseCrossingTime(energy,
-                                                         density,
-                                                         1./density,
-                                                         velocityX,
-                                                         velocityY,
-                                                         velocityZ,
-                                                         cellSizeX,
-                                                         cellSizeY,
-                                                         cellSizeZ,
-                                                         gamma);
-
-// Check results
-testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time");
+  // Set test values
+  double const energy    = 7.6976906577e2;
+  double const density   = 1.6756968986;
+  double const velocityX = 7.0829278656;
+  double const velocityY = 5.9283073464;
+  double const velocityZ = 8.8417748226;
+  double const cellSizeX = 8.1019429453e2;
+  double const cellSizeY = 7.1254780684e2;
+  double const cellSizeZ = 7.5676716066e2;
+  double const gamma     = 5. / 3.;
+
+  // Fiducial Values
+  double const fiducialInverseCrossingTime = 0.038751126881804446;
+
+  // Function to test
+  double testInverseCrossingTime = hydroInverseCrossingTime(energy, density, 1. / density, velocityX, velocityY,
+                                                            velocityZ, cellSizeX, cellSizeY, cellSizeZ, gamma);
+
+  // Check results
+  testing_utilities::Check_Results(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time");
 }
 // =============================================================================
 // End of tests for the hydroInverseCrossingTime function
@@ -132,8 +113,7 @@ testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingT
 // =============================================================================
 // Tests for the mhdInverseCrossingTime function
 // =============================================================================
-TEST(tMHDMhdInverseCrossingTime,
-     CorrectInputExpectCorrectOutput)
+TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput)
 {
   // Set test values
   double const energy    = 7.6976906577e2;
@@ -147,32 +127,66 @@ TEST(tMHDMhdInverseCrossingTime,
   double const cellSizeX = 8.1019429453e2;
   double const cellSizeY = 7.1254780684e2;
   double const cellSizeZ = 7.5676716066e2;
-  double const gamma = 5./3.;
+  double const gamma     = 5. / 3.;
 
   // Fiducial Values
   double const fiducialInverseCrossingTime = 0.038688028391959103;
 
   // Function to test
-  double testInverseCrossingTime = mhdInverseCrossingTime(energy,
-                                                          density,
-                                                          1./density,
-                                                          velocityX,
-                                                          velocityY,
-                                                          velocityZ,
-                                                          magneticX,
-                                                          magneticY,
-                                                          magneticZ,
-                                                          cellSizeX,
-                                                          cellSizeY,
-                                                          cellSizeZ,
-                                                          gamma);
-
+  double testInverseCrossingTime =
+      mhdInverseCrossingTime(energy, density, 1. / density, velocityX, velocityY, velocityZ, magneticX, magneticY,
+                             magneticZ, cellSizeX, cellSizeY, cellSizeZ, gamma);
 
   // Check results
-  testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time");
+  testing_utilities::Check_Results(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time");
 }
 // =============================================================================
 // End of tests for the mhdInverseCrossingTime function
 // =============================================================================
 
-#endif  // CUDA
+TEST(tHYDROScalarFloor, CorrectInputExpectCorrectOutput)
+{
+  int num_blocks = 1;
+  dim3 dim1dGrid(num_blocks, 1, 1);
+  dim3 dim1dBlock(TPB, 1, 1);
+  int const nx        = 1;
+  int const ny        = 1;
+  int const nz        = 1;
+  int const n_fields  = 6;  // 5 conserved + 1 scalar
+  int const n_ghost   = 0;
+  int const field_num = 5;  // scalar field index
+
+  // initialize host and device conserved arrays
+  std::vector<Real> host_conserved(n_fields);
+  cuda_utilities::DeviceVector<Real> dev_conserved(n_fields);
+
+  // Set values of conserved variables for input (host)
+  host_conserved.at(0) = 0.0;  // density
+  host_conserved.at(1) = 0.0;  // x momentum
+  host_conserved.at(2) = 0.0;  // y momentum
+  host_conserved.at(3) = 0.0;  // z momentum
+  host_conserved.at(4) = 0.0;  // energy
+
+  Real scalar_floor = 1.0;  // minimum allowed value for scalar field
+
+  // Case where scalar is below the floor
+  host_conserved.at(field_num) = 0.0;  // scalar
+  dev_conserved.cpyHostToDevice(host_conserved);
+  hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost,
+                     field_num, scalar_floor);
+  testing_utilities::Check_Results(scalar_floor, dev_conserved.at(field_num), "below floor");
+
+  // Case where scalar is above the floor
+  host_conserved.at(field_num) = 2.0;  // scalar
+  dev_conserved.cpyHostToDevice(host_conserved);
+  hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost,
+                     field_num, scalar_floor);
+  testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "above floor");
+
+  // Case where scalar is at the floor
+  host_conserved.at(field_num) = 1.0;  // scalar
+  dev_conserved.cpyHostToDevice(host_conserved);
+  hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost,
+                     field_num, scalar_floor);
+  testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "at floor");
+}
\ No newline at end of file
diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu
index 0eaecc899..f2ad520b8 100644
--- a/src/integrators/VL_1D_cuda.cu
+++ b/src/integrators/VL_1D_cuda.cu
@@ -1,153 +1,165 @@
 /*! \file VL_1D_cuda.cu
  *  \brief Definitions of the cuda VL algorithm functions. */
 
-#ifdef CUDA
 #ifdef VL
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../hydro/hydro_cuda.h"
-#include "../integrators/VL_1D_cuda.h"
-#include "../reconstruction/pcm_cuda.h"
-#include "../reconstruction/plmp_cuda.h"
-#include "../reconstruction/plmc_cuda.h"
-#include "../reconstruction/ppmp_cuda.h"
-#include "../reconstruction/ppmc_cuda.h"
-#include "../riemann_solvers/exact_cuda.h"
-#include "../riemann_solvers/roe_cuda.h"
-#include "../riemann_solvers/hllc_cuda.h"
-#include "../utils/error_handling.h"
-#include "../io/io.h"
-
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../hydro/hydro_cuda.h"
+  #include "../integrators/VL_1D_cuda.h"
+  #include "../io/io.h"
+  #include "../reconstruction/pcm_cuda.h"
+  #include "../reconstruction/plmc_cuda.h"
+  #include "../reconstruction/plmp_cuda.h"
+  #include "../reconstruction/ppmc_cuda.h"
+  #include "../reconstruction/ppmp_cuda.h"
+  #include "../riemann_solvers/exact_cuda.h"
+  #include "../riemann_solvers/hllc_cuda.h"
+  #include "../riemann_solvers/roe_cuda.h"
+  #include "../utils/error_handling.h"
+  #include "../utils/gpu.hpp"
 
 __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F,
-                                                   int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields);
-
+                                                   int n_cells, int n_ghost, Real dx, Real dt, Real gamma,
+                                                   int n_fields);
 
-
-void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields)
+void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt,
+                          int n_fields, int custom_grav)
 {
-  //Here, *dev_conserved contains the entire
-  //set of conserved variables on the grid
+  // Here, *dev_conserved contains the entire
+  // set of conserved variables on the grid
 
-  int n_cells = nx;
-  int ny = 1;
-  int nz = 1;
-  int ngrid = (n_cells + TPB - 1) / TPB;
+  int n_cells             = nx;
+  [[maybe_unused]] int ny = 1;
+  [[maybe_unused]] int nz = 1;
+  int ngrid               = (n_cells + TPB - 1) / TPB;
 
   // set the dimensions of the cuda grid
   dim3 dimGrid(ngrid, 1, 1);
   dim3 dimBlock(TPB, 1, 1);
 
-  if ( !memory_allocated ) {
-
+  if (!memory_allocated) {
     // allocate memory on the GPU
     dev_conserved = d_conserved;
-    //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_x,   n_fields*n_cells*sizeof(Real)) );
-
-    // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory.
-    // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep.
+    // GPU_Error_Check( cudaMalloc((void**)&dev_conserved,
+    // n_fields*n_cells*sizeof(Real)) );
+    GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real)));
+
+    // If memory is single allocated: memory_allocated becomes true and
+    // successive timesteps won't allocate memory. If the memory is not single
+    // allocated: memory_allocated remains Null and memory is allocated every
+    // timestep.
     memory_allocated = true;
   }
 
-  // Step 1: Use PCM reconstruction to put conserved variables into interface arrays
-  hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields);
-  CudaCheckError();
+  // Step 1: Use PCM reconstruction to put conserved variables into interface
+  // arrays
+  hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama,
+                     n_fields);
+  GPU_Error_Check();
 
   // Step 2: Calculate first-order upwind fluxes
   #ifdef EXACT
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
   #endif
   #ifdef ROE
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0,
+                     n_fields);
   #endif
   #ifdef HLLC
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0,
+                     n_fields);
   #endif
-  CudaCheckError();
-
+  GPU_Error_Check();
 
   // Step 3: Update the conserved variables half a timestep
-  hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, n_cells, n_ghost, dx, 0.5*dt, gama, n_fields);
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, 0, dev_conserved, dev_conserved_half,
+                     F_x, n_cells, n_ghost, dx, 0.5 * dt, gama, n_fields);
+  GPU_Error_Check();
 
-  // Step 4: Construct left and right interface values using updated conserved variables
+  // Step 4: Construct left and right interface values using updated conserved
+  // variables
   #ifdef PCM
-  hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields);
+  hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama,
+                     n_fields);
   #endif
   #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+                     n_fields);
   #endif
   #ifdef PLMP
-  hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt,
+                     gama, 0, n_fields);
   #endif
   #ifdef PPMP
-  hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt,
+                     gama, 0, n_fields);
   #endif
   #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PPMC_VL, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0);
   #endif
-  CudaCheckError();
-
+  GPU_Error_Check();
 
   // Step 5: Calculate the fluxes again
   #ifdef EXACT
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
   #endif
   #ifdef ROE
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0,
+                     n_fields);
   #endif
   #ifdef HLLC
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0,
+                     n_fields);
   #endif
-  CudaCheckError();
+  GPU_Error_Check();
 
   #ifdef DE
-  // Compute the divergence of velocity before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables
-  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0,  dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields );
+  // Compute the divergence of velocity before updating the conserved array,
+  // this solves synchronization issues when adding this term on
+  // Update_Conserved_Variables
+  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx,
+                     n_ghost, dx, dt, gama, n_fields);
   #endif
 
-
   // Step 6: Update the conserved variable array
-  hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields);
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off,
+                     n_ghost, dx, xbound, dt, gama, n_fields, custom_grav);
+  GPU_Error_Check();
 
   #ifdef DE
   hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields);
   hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, gama, n_fields);
-  CudaCheckError();
+  GPU_Error_Check();
   #endif
 
   return;
-
-
 }
 
-void Free_Memory_VL_1D() {
-
+void Free_Memory_VL_1D()
+{
   // free the GPU memory
   cudaFree(dev_conserved);
   cudaFree(dev_conserved_half);
   cudaFree(Q_Lx);
   cudaFree(Q_Rx);
   cudaFree(F_x);
-
 }
 
-__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields)
+__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F,
+                                                   int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields)
 {
   int id, imo;
-  Real dtodx = dt/dx;
+  Real dtodx = dt / dx;
 
   // get a global thread ID
   id = threadIdx.x + blockIdx.x * blockDim.x;
@@ -158,47 +170,48 @@ __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *de
   int ipo;
   #endif
 
-  // threads corresponding all cells except outer ring of ghost cells do the calculation
-  if (id > 0 && id < n_cells-1)
-  {
-    imo = id-1;
-    #ifdef DE
-    d  =  dev_conserved[            id];
+  // threads corresponding all cells except outer ring of ghost cells do the
+  // calculation
+  if (id > 0 && id < n_cells - 1) {
+    imo = id - 1;
+  #ifdef DE
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-    //if (d < 0.0 || d != d) printf("Negative density before half step update.\n");
-    //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id);
-    ipo = id+1;
-    vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo];
-    vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo];
-    #endif
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    P     = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
+    // if (d < 0.0 || d != d) printf("Negative density before half step
+    // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step
+    // update.\n", id);
+    ipo    = id + 1;
+    vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo];
+    vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo];
+  #endif
     // update the conserved variable array
-    dev_conserved_half[            id] = dev_conserved[            id] + dtodx * (dev_F[            imo] - dev_F[            id]);
-    dev_conserved_half[  n_cells + id] = dev_conserved[  n_cells + id] + dtodx * (dev_F[  n_cells + imo] - dev_F[  n_cells + id]);
-    dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id] + dtodx * (dev_F[2*n_cells + imo] - dev_F[2*n_cells + id]);
-    dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id] + dtodx * (dev_F[3*n_cells + imo] - dev_F[3*n_cells + id]);
-    dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id] + dtodx * (dev_F[4*n_cells + imo] - dev_F[4*n_cells + id]);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_conserved_half[(5+i)*n_cells + id] = dev_conserved[(5+i)*n_cells + id] + dtodx * (dev_F[(5+i)*n_cells + imo] - dev_F[(5+i)*n_cells + id]);
+    dev_conserved_half[id] = dev_conserved[id] + dtodx * (dev_F[imo] - dev_F[id]);
+    dev_conserved_half[n_cells + id] =
+        dev_conserved[n_cells + id] + dtodx * (dev_F[n_cells + imo] - dev_F[n_cells + id]);
+    dev_conserved_half[2 * n_cells + id] =
+        dev_conserved[2 * n_cells + id] + dtodx * (dev_F[2 * n_cells + imo] - dev_F[2 * n_cells + id]);
+    dev_conserved_half[3 * n_cells + id] =
+        dev_conserved[3 * n_cells + id] + dtodx * (dev_F[3 * n_cells + imo] - dev_F[3 * n_cells + id]);
+    dev_conserved_half[4 * n_cells + id] =
+        dev_conserved[4 * n_cells + id] + dtodx * (dev_F[4 * n_cells + imo] - dev_F[4 * n_cells + id]);
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_conserved_half[(5 + i) * n_cells + id] =
+          dev_conserved[(5 + i) * n_cells + id] +
+          dtodx * (dev_F[(5 + i) * n_cells + imo] - dev_F[(5 + i) * n_cells + id]);
     }
-    #endif
-    #ifdef DE
-    dev_conserved_half[(n_fields-1)*n_cells + id] = dev_conserved[(n_fields-1)*n_cells + id]
-                                       + dtodx * (dev_F[(n_fields-1)*n_cells + imo] - dev_F[(n_fields-1)*n_cells + id])
-                                       + 0.5*P*(dtodx*(vx_imo-vx_ipo));
-    #endif
+  #endif
+  #ifdef DE
+    dev_conserved_half[(n_fields - 1) * n_cells + id] =
+        dev_conserved[(n_fields - 1) * n_cells + id] +
+        dtodx * (dev_F[(n_fields - 1) * n_cells + imo] - dev_F[(n_fields - 1) * n_cells + id]) +
+        0.5 * P * (dtodx * (vx_imo - vx_ipo));
+  #endif
   }
-
-
 }
 
-
-
-
-
-#endif //VL
-#endif //CUDA
+#endif  // VL
diff --git a/src/integrators/VL_1D_cuda.h b/src/integrators/VL_1D_cuda.h
index 9b28958b0..3b7ff5425 100644
--- a/src/integrators/VL_1D_cuda.h
+++ b/src/integrators/VL_1D_cuda.h
@@ -1,16 +1,14 @@
 /*! \file VL_1D_cuda.h
  *  \brief Declarations for the cuda version of the 1D VL algorithm. */
 
-#ifdef CUDA
-
 #ifndef VL_1D_CUDA_H
 #define VL_1D_CUDA_H
 
 #include "../global/global.h"
 
-void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields);
+void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt,
+                          int n_fields, int custom_grav);
 
 void Free_Memory_VL_1D();
 
-#endif //VL_1D_CUDA_H
-#endif //CUDA
+#endif  // VL_1D_CUDA_H
diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu
index 62b8aa97a..3c8992d71 100644
--- a/src/integrators/VL_2D_cuda.cu
+++ b/src/integrators/VL_2D_cuda.cu
@@ -1,152 +1,169 @@
 /*! \file VL_2D_cuda.cu
  *  \brief Definitions of the cuda 2D VL algorithm functions. */
 
-#ifdef CUDA
 #ifdef VL
 
-#include <stdio.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../hydro/hydro_cuda.h"
-#include "../integrators/VL_2D_cuda.h"
-#include "../reconstruction/pcm_cuda.h"
-#include "../reconstruction/plmp_cuda.h"
-#include "../reconstruction/plmc_cuda.h"
-#include "../reconstruction/ppmp_cuda.h"
-#include "../reconstruction/ppmc_cuda.h"
-#include "../riemann_solvers/exact_cuda.h"
-#include "../riemann_solvers/roe_cuda.h"
-#include "../riemann_solvers/hllc_cuda.h"
-
-
-__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half,
-                                                   Real *dev_F_x, Real *dev_F_y, int nx, int ny,
-                                                   int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields);
-
-
-void VL_Algorithm_2D_CUDA ( Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost,
-   Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields)
+  #include <math.h>
+  #include <stdio.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../hydro/hydro_cuda.h"
+  #include "../integrators/VL_2D_cuda.h"
+  #include "../reconstruction/pcm_cuda.h"
+  #include "../reconstruction/plmc_cuda.h"
+  #include "../reconstruction/plmp_cuda.h"
+  #include "../reconstruction/ppmc_cuda.h"
+  #include "../reconstruction/ppmp_cuda.h"
+  #include "../riemann_solvers/exact_cuda.h"
+  #include "../riemann_solvers/hllc_cuda.h"
+  #include "../riemann_solvers/roe_cuda.h"
+  #include "../utils/gpu.hpp"
+
+__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x,
+                                                   Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy,
+                                                   Real dt, Real gamma, int n_fields);
+
+void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy,
+                          Real xbound, Real ybound, Real dt, int n_fields, int custom_grav)
 {
+  // Here, *dev_conserved contains the entire
+  // set of conserved variables on the grid
+  // concatenated into a 1-d array
 
-  //Here, *dev_conserved contains the entire
-  //set of conserved variables on the grid
-  //concatenated into a 1-d array
-
-  int n_cells = nx*ny;
-  int nz = 1;
-  int ngrid = (n_cells + TPB - 1) / TPB;
+  int n_cells             = nx * ny;
+  [[maybe_unused]] int nz = 1;
+  int ngrid               = (n_cells + TPB - 1) / TPB;
 
   // set values for GPU kernels
   // number of blocks per 1D grid
   dim3 dim2dGrid(ngrid, 1, 1);
-  //number of threads per 1D block
+  // number of threads per 1D block
   dim3 dim1dBlock(TPB, 1, 1);
 
-
-  if ( !memory_allocated ) {
-
+  if (!memory_allocated) {
     // allocate GPU arrays
-    //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) );
+    // GPU_Error_Check( cudaMalloc((void**)&dev_conserved,
+    // n_fields*n_cells*sizeof(Real)) );
     dev_conserved = d_conserved;
-    CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_x,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_y,  n_fields*n_cells*sizeof(Real)) );
-
-    // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory.
-    // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep.
+    GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real)));
+
+    // If memory is single allocated: memory_allocated becomes true and
+    // successive timesteps won't allocate memory. If the memory is not single
+    // allocated: memory_allocated remains Null and memory is allocated every
+    // timestep.
     memory_allocated = true;
   }
 
-  // Step 1: Use PCM reconstruction to put conserved variables into interface arrays
-  hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields);
-  CudaCheckError();
-
+  // Step 1: Use PCM reconstruction to put conserved variables into interface
+  // arrays
+  hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny,
+                     n_ghost, gama, n_fields);
+  GPU_Error_Check();
 
   // Step 2: Calculate first-order upwind fluxes
   #ifdef EXACT
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
   #endif
   #ifdef ROE
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama,
+                     1, n_fields);
   #endif
   #ifdef HLLC
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
   #endif
-  CudaCheckError();
-
+  GPU_Error_Check();
 
   // Step 3: Update the conserved variables half a timestep
-  hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5*dt, gama, n_fields);
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half,
+                     F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5 * dt, gama, n_fields);
+  GPU_Error_Check();
 
-  // Step 4: Construct left and right interface values using updated conserved variables
+  // Step 4: Construct left and right interface values using updated conserved
+  // variables
   #ifdef PLMP
-  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx,
+                     dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy,
+                     dt, gama, 1, n_fields);
   #endif
   #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama,
+                     0, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama,
+                     1, n_fields);
   #endif
   #ifdef PPMP
-  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  #endif //PPMP
+  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx,
+                     dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy,
+                     dt, gama, 1, n_fields);
+  #endif  // PPMP
   #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  #endif //PPMC
-  CudaCheckError();
-
+  hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0);
+  hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1);
+  #endif  // PPMC
+  GPU_Error_Check();
 
   // Step 5: Calculate the fluxes again
   #ifdef EXACT
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
   #endif
   #ifdef ROE
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama,
+                     1, n_fields);
   #endif
   #ifdef HLLC
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
   #endif
-  CudaCheckError();
+  GPU_Error_Check();
 
   #ifdef DE
-  // Compute the divergence of velocity before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables
-  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0,  dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields );
+  // Compute the divergence of velocity before updating the conserved array,
+  // this solves synchronization issues when adding this term on
+  // Update_Conserved_Variables
+  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx,
+                     Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields);
   #endif
 
-
   // Step 6: Update the conserved variable array
-  hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields);
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off,
+                     y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav);
+  GPU_Error_Check();
 
   #ifdef DE
   hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields);
   hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields);
-  CudaCheckError();
+  GPU_Error_Check();
   #endif
 
   return;
-
 }
 
-void Free_Memory_VL_2D() {
-
+void Free_Memory_VL_2D()
+{
   // free the GPU memory
   cudaFree(dev_conserved);
   cudaFree(dev_conserved_half);
@@ -156,25 +173,25 @@ void Free_Memory_VL_2D() {
   cudaFree(Q_Ry);
   cudaFree(F_x);
   cudaFree(F_y);
-
 }
 
-
-__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields)
+__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x,
+                                                   Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy,
+                                                   Real dt, Real gamma, int n_fields)
 {
   int id, xid, yid, n_cells;
   int imo, jmo;
 
-  Real dtodx = dt/dx;
-  Real dtody = dt/dy;
+  Real dtodx = dt / dx;
+  Real dtody = dt / dy;
 
-  n_cells = nx*ny;
+  n_cells = nx * ny;
 
   // get a global thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  id = threadIdx.x + blockId * blockDim.x;
-  yid = id / nx;
-  xid = id - yid*nx;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  id          = threadIdx.x + blockId * blockDim.x;
+  yid         = id / nx;
+  xid         = id - yid * nx;
 
   #ifdef DE
   Real d, d_inv, vx, vy, vz;
@@ -182,63 +199,58 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *de
   int ipo, jpo;
   #endif
 
-
   // all threads but one outer ring of ghost cells
-  if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1)
-  {
-    imo = xid-1 + yid*nx;
-    jmo = xid + (yid-1)*nx;
-    #ifdef DE
-    d  =  dev_conserved[            id];
+  if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1) {
+    imo = xid - 1 + yid * nx;
+    jmo = xid + (yid - 1) * nx;
+  #ifdef DE
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-    //if (d < 0.0 || d != d) printf("Negative density before half step update.\n");
-    //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id);
-    ipo = xid+1 + yid*nx;
-    jpo = xid + (yid+1)*nx;
-    vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo];
-    vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo];
-    vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo];
-    vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo];
-    #endif
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    P     = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0);
+    // if (d < 0.0 || d != d) printf("Negative density before half step
+    // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step
+    // update.\n", id);
+    ipo    = xid + 1 + yid * nx;
+    jpo    = xid + (yid + 1) * nx;
+    vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo];
+    vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo];
+    vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo];
+    vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo];
+  #endif
     // update the conserved variable array
-    dev_conserved_half[            id] = dev_conserved[            id]
-                                       + dtodx * (dev_F_x[            imo] - dev_F_x[            id])
-                                       + dtody * (dev_F_y[            jmo] - dev_F_y[            id]);
-    dev_conserved_half[  n_cells + id] = dev_conserved[  n_cells + id]
-                                       + dtodx * (dev_F_x[  n_cells + imo] - dev_F_x[  n_cells + id])
-                                       + dtody * (dev_F_y[  n_cells + jmo] - dev_F_y[  n_cells + id]);
-    dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id]
-                                       + dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id])
-                                       + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]);
-    dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id]
-                                       + dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id])
-                                       + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]);
-    dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id]
-                                       + dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id])
-                                       + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_conserved_half[(5+i)*n_cells + id] = dev_conserved[(5+i)*n_cells + id]
-                                         + dtodx * (dev_F_x[(5+i)*n_cells + imo] - dev_F_x[(5+i)*n_cells + id])
-                                         + dtody * (dev_F_y[(5+i)*n_cells + jmo] - dev_F_y[(5+i)*n_cells + id]);
+    dev_conserved_half[id] =
+        dev_conserved[id] + dtodx * (dev_F_x[imo] - dev_F_x[id]) + dtody * (dev_F_y[jmo] - dev_F_y[id]);
+    dev_conserved_half[n_cells + id] = dev_conserved[n_cells + id] +
+                                       dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) +
+                                       dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]);
+    dev_conserved_half[2 * n_cells + id] = dev_conserved[2 * n_cells + id] +
+                                           dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) +
+                                           dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]);
+    dev_conserved_half[3 * n_cells + id] = dev_conserved[3 * n_cells + id] +
+                                           dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) +
+                                           dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]);
+    dev_conserved_half[4 * n_cells + id] = dev_conserved[4 * n_cells + id] +
+                                           dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) +
+                                           dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]);
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_conserved_half[(5 + i) * n_cells + id] =
+          dev_conserved[(5 + i) * n_cells + id] +
+          dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) +
+          dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]);
     }
-    #endif
-    #ifdef DE
-    dev_conserved_half[(n_fields-1)*n_cells + id] = dev_conserved[(n_fields-1)*n_cells + id]
-                                       + dtodx * (dev_F_x[(n_fields-1)*n_cells + imo] - dev_F_x[(n_fields-1)*n_cells + id])
-                                       + dtody * (dev_F_y[(n_fields-1)*n_cells + jmo] - dev_F_y[(n_fields-1)*n_cells + id])
-                                       + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo));
-    #endif
+  #endif
+  #ifdef DE
+    dev_conserved_half[(n_fields - 1) * n_cells + id] =
+        dev_conserved[(n_fields - 1) * n_cells + id] +
+        dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) +
+        dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) +
+        0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo));
+  #endif
   }
 }
 
-
-
-
-#endif //VL
-#endif //CUDA
-
+#endif  // VL
diff --git a/src/integrators/VL_2D_cuda.h b/src/integrators/VL_2D_cuda.h
index 355bee827..0ac7da807 100644
--- a/src/integrators/VL_2D_cuda.h
+++ b/src/integrators/VL_2D_cuda.h
@@ -1,17 +1,14 @@
 /*! \file VL_2D_cuda.h
  *  \brief Declarations for the cuda version of the 2D VL algorithm. */
 
-#ifdef CUDA
-
 #ifndef VL_2D_CUDA_H
 #define VL_2D_CUDA_H
 
 #include "../global/global.h"
 
-void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost,
-  Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields);
+void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy,
+                          Real xbound, Real ybound, Real dt, int n_fields, int custom_grav);
 
 void Free_Memory_VL_2D();
 
-#endif //VL_2D_CUDA_H
-#endif //CUDA
+#endif  // VL_2D_CUDA_H
diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu
index 097708ede..5f9b30095 100644
--- a/src/integrators/VL_3D_cuda.cu
+++ b/src/integrators/VL_3D_cuda.cu
@@ -1,45 +1,50 @@
 /*! \file VL_3D_cuda.cu
- *  \brief Definitions of the cuda 3D VL algorithm functions. */
+ *  \brief Definitions of the cuda 3 D VL algorithm functions. MHD algorithm
+ *  from Stone & Gardiner 2009 "A simple unsplit Godunov method for
+ *  multidimensional MHD"
+ */
 
-#ifdef CUDA
 #ifdef VL
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../utils/hydro_utilities.h"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../integrators/VL_3D_cuda.h"
-#include "../hydro/hydro_cuda.h"
-#include "../reconstruction/pcm_cuda.h"
-#include "../reconstruction/plmp_cuda.h"
-#include "../reconstruction/plmc_cuda.h"
-#include "../reconstruction/ppmp_cuda.h"
-#include "../reconstruction/ppmc_cuda.h"
-#include "../riemann_solvers/exact_cuda.h"
-#include "../riemann_solvers/roe_cuda.h"
-#include "../riemann_solvers/hllc_cuda.h"
-#include "../io/io.h"
-#include "../riemann_solvers/hll_cuda.h"
-
-__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y,  Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor);
-
-
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../hydro/hydro_cuda.h"
+  #include "../integrators/VL_3D_cuda.h"
+  #include "../io/io.h"
+  #include "../mhd/ct_electric_fields.h"
+  #include "../mhd/magnetic_update.h"
+  #include "../reconstruction/pcm_cuda.h"
+  #include "../reconstruction/plmc_cuda.h"
+  #include "../reconstruction/plmp_cuda.h"
+  #include "../reconstruction/ppmc_cuda.h"
+  #include "../reconstruction/ppmp_cuda.h"
+  #include "../riemann_solvers/exact_cuda.h"
+  #include "../riemann_solvers/hll_cuda.h"
+  #include "../riemann_solvers/hllc_cuda.h"
+  #include "../riemann_solvers/hlld_cuda.h"
+  #include "../riemann_solvers/roe_cuda.h"
+  #include "../utils/gpu.hpp"
+  #include "../utils/hydro_utilities.h"
+
+__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x,
+                                                   Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost,
+                                                   Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields,
+                                                   Real density_floor);
 
 void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off,
-    int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound,
-    Real ybound, Real zbound, Real dt, int n_fields, Real density_floor,
-    Real U_floor, Real *host_grav_potential )
+                          int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound,
+                          Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential)
 {
+  // Here, *dev_conserved contains the entire
+  // set of conserved variables on the grid
+  // concatenated into a 1-d array
 
-  //Here, *dev_conserved contains the entire
-  //set of conserved variables on the grid
-  //concatenated into a 1-d array
-
-  int n_cells = nx*ny*nz;
-  int ngrid = (n_cells + TPB - 1) / TPB;
+  int n_cells = nx * ny * nz;
+  int ngrid   = (n_cells + TPB - 1) / TPB;
 
   // set values for GPU kernels
   // number of blocks per 1D grid
@@ -47,155 +52,315 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB, 1, 1);
 
-  //host_grav_potential is NULL if not using GRAVITY
+  // host_grav_potential is NULL if not using GRAVITY
   temp_potential = host_grav_potential;
 
-  if ( !memory_allocated ){
-
+  if (!memory_allocated) {
     // allocate memory on the GPU
-    //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) );
     dev_conserved = d_conserved;
-    CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Lx,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rx,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ly,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ry,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Lz,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rz,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_x,   n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_y,   n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_z,   n_fields*n_cells*sizeof(Real)) );
-
-    #if defined( GRAVITY )
-    // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, n_cells*sizeof(Real)) );
+
+  // Set the size of the interface and flux arrays
+  #ifdef MHD
+    // In MHD/Constrained Transport the interface arrays have one fewer fields
+    // since the magnetic field that is stored on the face does not require
+    // reconstructions. Similarly the fluxes have one fewer fields since the
+    // magnetic field on that face doesn't have an associated flux. Each
+    // interface array store the magnetic fields on that interface that are
+    // not perpendicular to the interface and arranged cyclically. I.e. the
+    // `Q_Lx` interface store the reconstructed Y and Z magnetic fields in
+    // that order, the `Q_Ly` interface stores the Z and X mangetic fields in
+    // that order, and the `Q_Lz` interface stores the X and Y magnetic fields
+    // in that order. These fields can be indexed with the Q_?_dir grid_enums.
+    // The interface state arrays store in the interface on the "right" side of
+    // the cell, so the flux arrays store the fluxes through the right interface
+    //
+    // According to Stone et al. 2008 section 5.3 and the source code of
+    // Athena, the following equation relate the magnetic flux to the face
+    // centered electric fields/EMF. -cross(V,B)x is the negative of the
+    // x-component of V cross B. Note that "X" is the direction the solver is
+    // running in this case, not necessarily the true "X".
+    //  F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy =
+    //  -(-cross(V,B))z = -EMF_Z F_x[(grid_enum::fluxX_magnetic_y)*n_cells] =
+    //  VxBz - BxVz =  (-cross(V,B))y =  EMF_Y
+    //  F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy =
+    //  -(-cross(V,B))z = -EMF_X F_y[(grid_enum::fluxY_magnetic_z)*n_cells] =
+    //  VxBz - BxVz =  (-cross(V,B))y =  EMF_Z
+    //  F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy =
+    //  -(-cross(V,B))z = -EMF_Y F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] =
+    //  VxBz - BxVz =  (-cross(V,B))y =  EMF_X
+    size_t const arraySize   = (n_fields - 1) * n_cells * sizeof(Real);
+    size_t const ctArraySize = 3 * n_cells * sizeof(Real);
+  #else   // not MHD
+    size_t const arraySize = n_fields * n_cells * sizeof(Real);
+  #endif  // MHD
+    GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lx, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rx, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ly, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ry, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lz, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rz, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&F_x, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&F_y, arraySize));
+    GPU_Error_Check(cudaMalloc((void **)&F_z, arraySize));
+
+    cuda_utilities::initGpuMemory(dev_conserved_half, n_fields * n_cells * sizeof(Real));
+    cuda_utilities::initGpuMemory(Q_Lx, arraySize);
+    cuda_utilities::initGpuMemory(Q_Rx, arraySize);
+    cuda_utilities::initGpuMemory(Q_Ly, arraySize);
+    cuda_utilities::initGpuMemory(Q_Ry, arraySize);
+    cuda_utilities::initGpuMemory(Q_Lz, arraySize);
+    cuda_utilities::initGpuMemory(Q_Rz, arraySize);
+    cuda_utilities::initGpuMemory(F_x, arraySize);
+    cuda_utilities::initGpuMemory(F_y, arraySize);
+    cuda_utilities::initGpuMemory(F_z, arraySize);
+
+  #ifdef MHD
+    GPU_Error_Check(cudaMalloc((void **)&ctElectricFields, ctArraySize));
+  #endif  // MHD
+
+  #if defined(GRAVITY)
     dev_grav_potential = d_grav_potential;
-    #else
+  #else   // not GRAVITY
     dev_grav_potential = NULL;
-    #endif
+  #endif  // GRAVITY
 
-    // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory.
-    // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep.
+    // If memory is single allocated: memory_allocated becomes true and
+    // successive timesteps won't allocate memory. If the memory is not single
+    // allocated: memory_allocated remains Null and memory is allocated every
+    // timestep.
     memory_allocated = true;
-
   }
 
-    #if defined( GRAVITY ) && !defined( GRAVITY_GPU )
-    CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) );
-    #endif
-
-
-    // Step 1: Use PCM reconstruction to put primitive variables into interface arrays
-    hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields);
-    CudaCheckError();
-
-
-    // Step 2: Calculate first-order upwind fluxes
-    #ifdef EXACT
-    hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //EXACT
-    #ifdef ROE
-    hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //ROE
-    #ifdef HLLC
-    hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //HLLC
-    #ifdef HLL
-    hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //HLL
-    CudaCheckError();
-
-
-    // Step 3: Update the conserved variables half a timestep
-    hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5*dt, gama, n_fields, density_floor );
-    CudaCheckError();
-
-
-    // Step 4: Construct left and right interface values using updated conserved variables
-    #ifdef PCM
-    hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields);
-    #endif
-    #ifdef PLMP
-    hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-    hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-    hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
-    #endif //PLMP
-    #ifdef PLMC
-    hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-    hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-    hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
-    #endif
-    #ifdef PPMP
-    hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-    hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-    hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
-    #endif //PPMP
-    #ifdef PPMC
-    hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-    hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-    hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
-    #endif //PPMC
-    CudaCheckError();
-
-
-    // Step 5: Calculate the fluxes again
-    #ifdef EXACT
-    hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //EXACT
-    #ifdef ROE
-    hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //ROE
-    #ifdef HLLC
-    hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //HLLC
-    #ifdef HLL
-    hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-    hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-    hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-    #endif //HLLC
-    CudaCheckError();
-
-    #ifdef DE
-    // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D
-    hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0,  dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz,  dt, gama, n_fields );
-    CudaCheckError();
-    #endif
+  #if defined(GRAVITY) && !defined(GRAVITY_GPU)
+  GPU_Error_Check(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice));
+  #endif  // GRAVITY and GRAVITY_GPU
+
+  // Step 1: Use PCM reconstruction to put primitive variables into interface
+  // arrays
+  cuda_utilities::AutomaticLaunchParams static const pcm_launch_params(PCM_Reconstruction_3D, n_cells);
+  hipLaunchKernelGGL(PCM_Reconstruction_3D, pcm_launch_params.numBlocks, pcm_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields);
+  GPU_Error_Check();
+
+  // Step 2: Calculate first-order upwind fluxes
+  #ifdef EXACT
+  cuda_utilities::AutomaticLaunchParams static const exact_launch_params(Calculate_Exact_Fluxes_CUDA,
+                                                                         n_cellsCalculate_Exact_Fluxes_CUDA);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0,
+                     0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0,
+                     0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0,
+                     0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // EXACT
+  #ifdef ROE
+  cuda_utilities::AutomaticLaunchParams static const roe_launch_params(Calculate_Roe_Fluxes_CUDA, n_cells);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0,
+                     Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // ROE
+  #ifdef HLLC
+  cuda_utilities::AutomaticLaunchParams static const hllc_launch_params(Calculate_HLLC_Fluxes_CUDA, n_cells);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0,
+                     Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // HLLC
+  #ifdef HLL
+  cuda_utilities::AutomaticLaunchParams static const hll_launch_params(Calculate_HLL_Fluxes_CUDA, n_cells);
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0,
+                     Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // HLL
+  #ifdef HLLD
+  cuda_utilities::AutomaticLaunchParams static const hlld_launch_params(mhd::Calculate_HLLD_Fluxes_CUDA, n_cells);
+  hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock,
+                     0, 0, Q_Lx, Q_Rx, &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0,
+                     n_fields);
+  hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock,
+                     0, 0, Q_Ly, Q_Ry, &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1,
+                     n_fields);
+  hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock,
+                     0, 0, Q_Lz, Q_Rz, &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2,
+                     n_fields);
+  #endif  // HLLD
+  GPU_Error_Check();
+
+  #ifdef MHD
+  // Step 2.5: Compute the Constrained transport electric fields
+  cuda_utilities::AutomaticLaunchParams static const ct_launch_params(mhd::Calculate_CT_Electric_Fields, n_cells);
+  hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, ct_launch_params.numBlocks, ct_launch_params.threadsPerBlock, 0,
+                     0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, ny, nz, n_cells);
+  GPU_Error_Check();
+  #endif  // MHD
+
+  // Step 3: Update the conserved variables half a timestep
+  cuda_utilities::AutomaticLaunchParams static const update_half_launch_params(Update_Conserved_Variables_3D_half,
+                                                                               n_cells);
+  hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, update_half_launch_params.numBlocks,
+                     update_half_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z,
+                     nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor);
+  GPU_Error_Check();
+
+  #ifdef MHD
+  // Update the magnetic fields
+  cuda_utilities::AutomaticLaunchParams static const update_magnetic_launch_params(mhd::Update_Magnetic_Field_3D,
+                                                                                   n_cells);
+  hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, update_magnetic_launch_params.numBlocks,
+                     update_magnetic_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved_half,
+                     ctElectricFields, nx, ny, nz, n_cells, 0.5 * dt, dx, dy, dz);
+  GPU_Error_Check();
+  #endif  // MHD
+
+  // Step 4: Construct left and right interface values using updated conserved
+  // variables
+  #ifdef PCM
+  hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry,
+                     Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields);
+  #endif  // PCM
+  #ifdef PLMP
+  cuda_utilities::AutomaticLaunchParams static const plmp_launch_params(PLMP_cuda, n_cells);
+  hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
+  #endif  // PLMP
+  #ifdef PLMC
+  cuda_utilities::AutomaticLaunchParams static const plmc_vl_launch_params(PLMC_cuda, n_cells);
+  hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, n_fields);
+  #endif  // PLMC
+  #ifdef PPMP
+  cuda_utilities::AutomaticLaunchParams static const ppmp_launch_params(PPMP_cuda, n_cells);
+  hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
+  hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
+  hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
+  #endif  // PPMP
+  #ifdef PPMC
+  cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_VL, n_cells);
+  hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0);
+  hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1);
+  hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2);
+  #endif  // PPMC
+  GPU_Error_Check();
+
+  // Step 5: Calculate the fluxes again
+  #ifdef EXACT
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0,
+                     0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0,
+                     0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0,
+                     0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // EXACT
+  #ifdef ROE
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0,
+                     Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // ROE
+  #ifdef HLLC
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0,
+                     Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // HLLC
+  #ifdef HLL
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0,
+                     Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0,
+                     Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
+  #endif  // HLLC
+  #ifdef HLLD
+  hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock,
+                     0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0,
+                     n_fields);
+  hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock,
+                     0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1,
+                     n_fields);
+  hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock,
+                     0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2,
+                     n_fields);
+  #endif  // HLLD
+  GPU_Error_Check();
 
+  #ifdef DE
+  // Compute the divergence of Vel before updating the conserved array, this
+  // solves synchronization issues when adding this term on
+  // Update_Conserved_Variables_3D
+  cuda_utilities::AutomaticLaunchParams static const de_advect_launch_params(Partial_Update_Advected_Internal_Energy_3D,
+                                                                             n_cells);
+  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, de_advect_launch_params.numBlocks,
+                     de_advect_launch_params.threadsPerBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz,
+                     nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields);
+  GPU_Error_Check();
+  #endif  // DE
+
+  #ifdef MHD
+  // Step 5.5: Compute the Constrained transport electric fields
+  hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, ct_launch_params.numBlocks, ct_launch_params.threadsPerBlock, 0,
+                     0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells);
+  GPU_Error_Check();
+  #endif  // MHD
+
+  // Step 6: Update the conserved variable array
+  cuda_utilities::AutomaticLaunchParams static const update_full_launch_params(Update_Conserved_Variables_3D, n_cells);
+  hipLaunchKernelGGL(Update_Conserved_Variables_3D, update_full_launch_params.numBlocks,
+                     update_full_launch_params.threadsPerBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz,
+                     F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt,
+                     gama, n_fields, custom_grav, density_floor, dev_grav_potential);
+  GPU_Error_Check();
+
+  #ifdef MHD
+  // Update the magnetic fields
+  hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, update_magnetic_launch_params.numBlocks,
+                     update_magnetic_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved,
+                     ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz);
+  GPU_Error_Check();
+  #endif  // MHD
 
-    // Step 6: Update the conserved variable array
-    hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential);
-    CudaCheckError();
+  #ifdef DE
+  cuda_utilities::AutomaticLaunchParams static const de_select_launch_params(Select_Internal_Energy_3D, n_cells);
+  hipLaunchKernelGGL(Select_Internal_Energy_3D, de_select_launch_params.numBlocks,
+                     de_select_launch_params.threadsPerBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields);
+  cuda_utilities::AutomaticLaunchParams static const de_sync_launch_params(Sync_Energies_3D, n_cells);
+  hipLaunchKernelGGL(Sync_Energies_3D, de_sync_launch_params.numBlocks, de_sync_launch_params.threadsPerBlock, 0, 0,
+                     dev_conserved, nx, ny, nz, n_ghost, gama, n_fields);
+  GPU_Error_Check();
+  #endif  // DE
 
-    #ifdef DE
-    hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields);
-    hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields);
-    CudaCheckError();
-    #endif
-
-    #ifdef TEMPERATURE_FLOOR
-    hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor );
-    CudaCheckError();
-    #endif //TEMPERATURE_FLOOR
   return;
-
 }
 
-
-void Free_Memory_VL_3D(){
-
+void Free_Memory_VL_3D()
+{
   // free the GPU memory
   cudaFree(dev_conserved);
   cudaFree(dev_conserved_half);
@@ -208,128 +373,125 @@ void Free_Memory_VL_3D(){
   cudaFree(F_x);
   cudaFree(F_y);
   cudaFree(F_z);
-
+  cudaFree(ctElectricFields);
 }
 
-__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y,  Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor )
+__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x,
+                                                   Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost,
+                                                   Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields,
+                                                   Real density_floor)
 {
-  Real dtodx = dt/dx;
-  Real dtody = dt/dy;
-  Real dtodz = dt/dz;
-  int n_cells = nx*ny*nz;
+  Real dtodx  = dt / dx;
+  Real dtody  = dt / dy;
+  Real dtodz  = dt / dz;
+  int n_cells = nx * ny * nz;
 
   // get a global thread ID
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
-  int id = xid + yid*nx + zid*nx*ny;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
+  int id  = xid + yid * nx + zid * nx * ny;
 
-  int imo = xid-1 + yid*nx + zid*nx*ny;
-  int jmo = xid + (yid-1)*nx + zid*nx*ny;
-  int kmo = xid + yid*nx + (zid-1)*nx*ny;
+  int imo = xid - 1 + yid * nx + zid * nx * ny;
+  int jmo = xid + (yid - 1) * nx + zid * nx * ny;
+  int kmo = xid + yid * nx + (zid - 1) * nx * ny;
 
   #ifdef DE
   Real d, d_inv, vx, vy, vz;
   Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo, P, E, E_kin, GE;
   int ipo, jpo, kpo;
-  #endif
-
-  #ifdef DENSITY_FLOOR
-  Real dens_0;
-  #endif
+  #endif  // DE
 
-  // threads corresponding to all cells except outer ring of ghost cells do the calculation
-  if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1 && zid > 0 && zid < nz-1)
-  {
-    #ifdef DE
-    d  =  dev_conserved[            id];
+  // threads corresponding to all cells except outer ring of ghost cells do the
+  // calculation
+  if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1 && zid > 0 && zid < nz - 1) {
+  #ifdef DE
+    d     = dev_conserved[id];
     d_inv = 1.0 / d;
-    vx =  dev_conserved[1*n_cells + id] * d_inv;
-    vy =  dev_conserved[2*n_cells + id] * d_inv;
-    vz =  dev_conserved[3*n_cells + id] * d_inv;
-    //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    GE = dev_conserved[(n_fields-1)*n_cells + id];
-    E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz );
-    P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma );
-    P  = fmax(P, (Real) TINY_NUMBER);
-    // P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-    //if (d < 0.0 || d != d) printf("Negative density before half step update.\n");
-    //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id);
-    ipo = xid+1 + yid*nx + zid*nx*ny;
-    jpo = xid + (yid+1)*nx + zid*nx*ny;
-    kpo = xid + yid*nx + (zid+1)*nx*ny;
-    vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo];
-    vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo];
-    vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo];
-    vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo];
-    vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo];
-    vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo];
-    #endif
+    vx    = dev_conserved[1 * n_cells + id] * d_inv;
+    vy    = dev_conserved[2 * n_cells + id] * d_inv;
+    vz    = dev_conserved[3 * n_cells + id] * d_inv;
+    // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    GE    = dev_conserved[(n_fields - 1) * n_cells + id];
+    E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz);
+    #ifdef MHD
+    // Add the magnetic energy
+    auto const [centeredBx, centeredBy, centeredBz] =
+        mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny);
+    E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz);
+    #endif  // MHD
+    P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma);
+    P = fmax(P, (Real)TINY_NUMBER);
+    // P  = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) *
+    // (gamma - 1.0);
+    // if (d < 0.0 || d != d) printf("Negative density before half step
+    // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step
+    // update.\n", id);
+    ipo    = xid + 1 + yid * nx + zid * nx * ny;
+    jpo    = xid + (yid + 1) * nx + zid * nx * ny;
+    kpo    = xid + yid * nx + (zid + 1) * nx * ny;
+    vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo];
+    vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo];
+    vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo];
+    vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo];
+    vz_kmo = dev_conserved[3 * n_cells + kmo] / dev_conserved[kmo];
+    vz_kpo = dev_conserved[3 * n_cells + kpo] / dev_conserved[kpo];
+  #endif  // DE
 
     // update the conserved variable array
-    dev_conserved_half[            id] = dev_conserved[            id]
-                                       + dtodx * (dev_F_x[            imo] - dev_F_x[            id])
-                                       + dtody * (dev_F_y[            jmo] - dev_F_y[            id])
-                                       + dtodz * (dev_F_z[            kmo] - dev_F_z[            id]);
-    dev_conserved_half[  n_cells + id] = dev_conserved[  n_cells + id]
-                                       + dtodx * (dev_F_x[  n_cells + imo] - dev_F_x[  n_cells + id])
-                                       + dtody * (dev_F_y[  n_cells + jmo] - dev_F_y[  n_cells + id])
-                                       + dtodz * (dev_F_z[  n_cells + kmo] - dev_F_z[  n_cells + id]);
-    dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id]
-                                       + dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id])
-                                       + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id])
-                                       + dtodz * (dev_F_z[2*n_cells + kmo] - dev_F_z[2*n_cells + id]);
-    dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id]
-                                       + dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id])
-                                       + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id])
-                                       + dtodz * (dev_F_z[3*n_cells + kmo] - dev_F_z[3*n_cells + id]);
-    dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id]
-                                       + dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id])
-                                       + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id])
-                                       + dtodz * (dev_F_z[4*n_cells + kmo] - dev_F_z[4*n_cells + id]);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_conserved_half[(5+i)*n_cells + id] = dev_conserved[(5+i)*n_cells + id]
-                                         + dtodx * (dev_F_x[(5+i)*n_cells + imo] - dev_F_x[(5+i)*n_cells + id])
-                                         + dtody * (dev_F_y[(5+i)*n_cells + jmo] - dev_F_y[(5+i)*n_cells + id])
-                                         + dtodz * (dev_F_z[(5+i)*n_cells + kmo] - dev_F_z[(5+i)*n_cells + id]);
+    dev_conserved_half[id] = dev_conserved[id] + dtodx * (dev_F_x[imo] - dev_F_x[id]) +
+                             dtody * (dev_F_y[jmo] - dev_F_y[id]) + dtodz * (dev_F_z[kmo] - dev_F_z[id]);
+    dev_conserved_half[n_cells + id] = dev_conserved[n_cells + id] +
+                                       dtodx * (dev_F_x[n_cells + imo] - dev_F_x[n_cells + id]) +
+                                       dtody * (dev_F_y[n_cells + jmo] - dev_F_y[n_cells + id]) +
+                                       dtodz * (dev_F_z[n_cells + kmo] - dev_F_z[n_cells + id]);
+    dev_conserved_half[2 * n_cells + id] = dev_conserved[2 * n_cells + id] +
+                                           dtodx * (dev_F_x[2 * n_cells + imo] - dev_F_x[2 * n_cells + id]) +
+                                           dtody * (dev_F_y[2 * n_cells + jmo] - dev_F_y[2 * n_cells + id]) +
+                                           dtodz * (dev_F_z[2 * n_cells + kmo] - dev_F_z[2 * n_cells + id]);
+    dev_conserved_half[3 * n_cells + id] = dev_conserved[3 * n_cells + id] +
+                                           dtodx * (dev_F_x[3 * n_cells + imo] - dev_F_x[3 * n_cells + id]) +
+                                           dtody * (dev_F_y[3 * n_cells + jmo] - dev_F_y[3 * n_cells + id]) +
+                                           dtodz * (dev_F_z[3 * n_cells + kmo] - dev_F_z[3 * n_cells + id]);
+    dev_conserved_half[4 * n_cells + id] = dev_conserved[4 * n_cells + id] +
+                                           dtodx * (dev_F_x[4 * n_cells + imo] - dev_F_x[4 * n_cells + id]) +
+                                           dtody * (dev_F_y[4 * n_cells + jmo] - dev_F_y[4 * n_cells + id]) +
+                                           dtodz * (dev_F_z[4 * n_cells + kmo] - dev_F_z[4 * n_cells + id]);
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_conserved_half[(5 + i) * n_cells + id] =
+          dev_conserved[(5 + i) * n_cells + id] +
+          dtodx * (dev_F_x[(5 + i) * n_cells + imo] - dev_F_x[(5 + i) * n_cells + id]) +
+          dtody * (dev_F_y[(5 + i) * n_cells + jmo] - dev_F_y[(5 + i) * n_cells + id]) +
+          dtodz * (dev_F_z[(5 + i) * n_cells + kmo] - dev_F_z[(5 + i) * n_cells + id]);
     }
-    #endif
-    #ifdef DE
-    dev_conserved_half[(n_fields-1)*n_cells + id] = dev_conserved[(n_fields-1)*n_cells + id]
-                                       + dtodx * (dev_F_x[(n_fields-1)*n_cells + imo] - dev_F_x[(n_fields-1)*n_cells + id])
-                                       + dtody * (dev_F_y[(n_fields-1)*n_cells + jmo] - dev_F_y[(n_fields-1)*n_cells + id])
-                                       + dtodz * (dev_F_z[(n_fields-1)*n_cells + kmo] - dev_F_z[(n_fields-1)*n_cells + id])
-                                       + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + dtodz*(vz_kmo-vz_kpo));
-    #endif
-
-    #ifdef DENSITY_FLOOR
-    if ( dev_conserved_half[            id] < density_floor ){
-      dens_0 = dev_conserved_half[            id];
-      printf("###Thread density change  %f -> %f \n", dens_0, density_floor );
-      dev_conserved_half[            id] = density_floor;
+  #endif  // SCALAR
+  #ifdef DE
+    dev_conserved_half[(n_fields - 1) * n_cells + id] =
+        dev_conserved[(n_fields - 1) * n_cells + id] +
+        dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) +
+        dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) +
+        dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]) +
+        0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo));
+  #endif  // DE
+  #ifdef DENSITY_FLOOR
+    if (dev_conserved_half[id] < density_floor) {
+      Real dens_0 = dev_conserved_half[id];
+      printf("###Thread density change  %f -> %f \n", dens_0, density_floor);
+      dev_conserved_half[id] = density_floor;
       // Scale the conserved values to the new density
-      dev_conserved_half[1*n_cells + id] *= (density_floor / dens_0);
-      dev_conserved_half[2*n_cells + id] *= (density_floor / dens_0);
-      dev_conserved_half[3*n_cells + id] *= (density_floor / dens_0);
-      dev_conserved_half[4*n_cells + id] *= (density_floor / dens_0);
-      #ifdef DE
-      dev_conserved_half[(n_fields-1)*n_cells + id] *= (density_floor / dens_0);
-      #endif
+      dev_conserved_half[1 * n_cells + id] *= (density_floor / dens_0);
+      dev_conserved_half[2 * n_cells + id] *= (density_floor / dens_0);
+      dev_conserved_half[3 * n_cells + id] *= (density_floor / dens_0);
+      dev_conserved_half[4 * n_cells + id] *= (density_floor / dens_0);
+    #ifdef DE
+      dev_conserved_half[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0);
+    #endif  // DE
     }
-    #endif
-    //if (dev_conserved_half[id] < 0.0 || dev_conserved_half[id] != dev_conserved_half[id] || dev_conserved_half[4*n_cells+id] < 0.0 || dev_conserved_half[4*n_cells+id] != dev_conserved_half[4*n_cells+id]) {
-      //printf("%3d %3d %3d Thread crashed in half step update. d: %e E: %e\n", xid, yid, zid, dev_conserved_half[id], dev_conserved_half[4*n_cells+id]);
-    //}
-
+  #endif  // DENSITY_FLOOR
   }
-
 }
 
-
-
-
-#endif //VL
-#endif //CUDA
+#endif  // VL
diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h
index 0d28710ab..4b80a4604 100644
--- a/src/integrators/VL_3D_cuda.h
+++ b/src/integrators/VL_3D_cuda.h
@@ -1,20 +1,15 @@
 /*! \file VL_3D_cuda.h
  *  \brief Declarations for the cuda version of the 3D VL algorithm. */
 
-#ifdef CUDA
-
 #ifndef VL_3D_CUDA_H
 #define VL_3D_CUDA_H
 
 #include "../global/global.h"
 
-void VL_Algorithm_3D_CUDA(Real *d_conserved,  Real *d_grav_potential,
-  int nx, int ny, int nz, int x_off, int y_off,
-  int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound,
-  Real ybound, Real zbound, Real dt, int n_fields, Real density_floor,
-  Real U_floor, Real *host_grav_potential );
+void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off,
+                          int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound,
+                          Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential);
 
 void Free_Memory_VL_3D();
 
-#endif //VL_3D_CUDA_H
-#endif //CUDA
+#endif  // VL_3D_CUDA_H
diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu
index c1f209f01..80f26021a 100644
--- a/src/integrators/simple_1D_cuda.cu
+++ b/src/integrators/simple_1D_cuda.cu
@@ -1,124 +1,126 @@
 /*! \file simple_1D_cuda.cu
  *  \brief Definitions of the 1D simple algorithm functions. */
 
-#ifdef CUDA
-
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../hydro/hydro_cuda.h"
 #include "../integrators/simple_1D_cuda.h"
+#include "../io/io.h"
 #include "../reconstruction/pcm_cuda.h"
-#include "../reconstruction/plmp_cuda.h"
 #include "../reconstruction/plmc_cuda.h"
-#include "../reconstruction/ppmp_cuda.h"
+#include "../reconstruction/plmp_cuda.h"
 #include "../reconstruction/ppmc_cuda.h"
+#include "../reconstruction/ppmp_cuda.h"
 #include "../riemann_solvers/exact_cuda.h"
-#include "../riemann_solvers/roe_cuda.h"
 #include "../riemann_solvers/hllc_cuda.h"
+#include "../riemann_solvers/roe_cuda.h"
 #include "../utils/error_handling.h"
-#include "../io/io.h"
-
-
+#include "../utils/gpu.hpp"
 
-void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields)
+void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt,
+                              int n_fields, int custom_grav)
 {
-  //Here, *dev_conserved contains the entire
-  //set of conserved variables on the grid
+  // Here, *dev_conserved contains the entire
+  // set of conserved variables on the grid
 
-  int n_cells = nx;
-  int ny = 1;
-  int nz = 1;
-  int ngrid = (n_cells + TPB - 1) / TPB;
+  int n_cells             = nx;
+  [[maybe_unused]] int ny = 1;
+  [[maybe_unused]] int nz = 1;
+  int ngrid               = (n_cells + TPB - 1) / TPB;
 
   // set the dimensions of the cuda grid
   dim3 dimGrid(ngrid, 1, 1);
   dim3 dimBlock(TPB, 1, 1);
 
-  if ( !memory_allocated ) {
-
+  if (!memory_allocated) {
     // allocate memory on the GPU
     dev_conserved = d_conserved;
-    //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_x,   (n_fields)*n_cells*sizeof(Real)) );
-
-    // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory.
-    // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep.
+    // GPU_Error_Check( cudaMalloc((void**)&dev_conserved,
+    // n_fields*n_cells*sizeof(Real)) );
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_x, (n_fields)*n_cells * sizeof(Real)));
+
+    // If memory is single allocated: memory_allocated becomes true and
+    // successive timesteps won't allocate memory. If the memory is not single
+    // allocated: memory_allocated remains Null and memory is allocated every
+    // timestep.
     memory_allocated = true;
   }
 
-  // Step 1: Do the reconstruction
-  #ifdef PCM
-  hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields);
-  CudaCheckError();
-  #endif
-  #ifdef PLMP
-  hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  CudaCheckError();
-  #endif
-  #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  CudaCheckError();
-  #endif
-  #ifdef PPMP
-  hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  CudaCheckError();
-  #endif
-  #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  CudaCheckError();
-  #endif
-
-
-  // Step 2: Calculate the fluxes
-  #ifdef EXACT
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  #endif
-  #ifdef ROE
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  #endif
-  #ifdef HLLC
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  #endif
-  CudaCheckError();
-
-  #ifdef DE
-  // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables
-  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0,  dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields );
-  #endif
-
+// Step 1: Do the reconstruction
+#ifdef PCM
+  hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama,
+                     n_fields);
+  GPU_Error_Check();
+#endif
+#ifdef PLMP
+  hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama,
+                     0, n_fields);
+  GPU_Error_Check();
+#endif
+#ifdef PLMC
+  hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+                     n_fields);
+  GPU_Error_Check();
+#endif
+#ifdef PPMP
+  hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama,
+                     0, n_fields);
+  GPU_Error_Check();
+#endif
+#ifdef PPMC
+  hipLaunchKernelGGL(PPMC_CTU, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0);
+  GPU_Error_Check();
+#endif
+
+// Step 2: Calculate the fluxes
+#ifdef EXACT
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
+#endif
+#ifdef ROE
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0,
+                     n_fields);
+#endif
+#ifdef HLLC
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0,
+                     n_fields);
+#endif
+  GPU_Error_Check();
+
+#ifdef DE
+  // Compute the divergence of Vel before updating the conserved array, this
+  // solves synchronization issues when adding this term on
+  // Update_Conserved_Variables
+  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx,
+                     n_ghost, dx, dt, gama, n_fields);
+#endif
 
   // Step 3: Update the conserved variable array
-  hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields);
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off,
+                     n_ghost, dx, xbound, dt, gama, n_fields, custom_grav);
+  GPU_Error_Check();
 
-  // Synchronize the total and internal energy, if using dual-energy formalism
-  #ifdef DE
+// Synchronize the total and internal energy, if using dual-energy formalism
+#ifdef DE
   hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields);
   hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, n_cells, n_ghost, gama, n_fields);
-  CudaCheckError();
-  #endif
-
+  GPU_Error_Check();
+#endif
 
   return;
-
 }
 
-void Free_Memory_Simple_1D() {
-
+void Free_Memory_Simple_1D()
+{
   // free the GPU memory
   cudaFree(dev_conserved);
   cudaFree(Q_Lx);
   cudaFree(Q_Rx);
   cudaFree(F_x);
-
 }
-
-
-#endif //CUDA
diff --git a/src/integrators/simple_1D_cuda.h b/src/integrators/simple_1D_cuda.h
index 6aba36059..82ccf0c29 100644
--- a/src/integrators/simple_1D_cuda.h
+++ b/src/integrators/simple_1D_cuda.h
@@ -1,16 +1,14 @@
 /*! \file simple_1D_cuda.h
  *  \brief Declarations for the 1D simple algorithm. */
 
-#ifdef CUDA
-
 #ifndef SIMPLE_1D_CUDA_H
 #define SIMPLE_1D_CUDA_H
 
 #include "../global/global.h"
 
-void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields);
+void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt,
+                              int n_fields, int custom_grav);
 
 void Free_Memory_Simple_1D();
 
-#endif //Simple_1D_CUDA_H
-#endif //CUDA
+#endif  // Simple_1D_CUDA_H
diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu
index 87cd87e58..97d435c51 100644
--- a/src/integrators/simple_2D_cuda.cu
+++ b/src/integrators/simple_2D_cuda.cu
@@ -1,120 +1,133 @@
 /*! \file simple_2D_cuda.cu
  *  \brief Definitions of the cuda 2D simple algorithm functions. */
 
-#ifdef CUDA
-
-#include <stdio.h>
 #include <math.h>
-#include "../utils/gpu.hpp"
+#include <stdio.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../hydro/hydro_cuda.h"
 #include "../integrators/simple_2D_cuda.h"
 #include "../reconstruction/pcm_cuda.h"
-#include "../reconstruction/plmp_cuda.h"
 #include "../reconstruction/plmc_cuda.h"
-#include "../reconstruction/ppmp_cuda.h"
+#include "../reconstruction/plmp_cuda.h"
 #include "../reconstruction/ppmc_cuda.h"
+#include "../reconstruction/ppmp_cuda.h"
 #include "../riemann_solvers/exact_cuda.h"
-#include "../riemann_solvers/roe_cuda.h"
 #include "../riemann_solvers/hllc_cuda.h"
+#include "../riemann_solvers/roe_cuda.h"
+#include "../utils/gpu.hpp"
 
-
-
-void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields)
+void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy,
+                              Real xbound, Real ybound, Real dt, int n_fields, int custom_grav)
 {
-
-  //Here, *dev_conserved contains the entire
-  //set of conserved variables on the grid
-  //concatenated into a 1-d array
-  int n_cells = nx*ny;
-  int nz = 1;
-  int ngrid = (n_cells + TPB - 1) / TPB;
-
+  // Here, *dev_conserved contains the entire
+  // set of conserved variables on the grid
+  // concatenated into a 1-d array
+  int n_cells             = nx * ny;
+  [[maybe_unused]] int nz = 1;
+  int ngrid               = (n_cells + TPB - 1) / TPB;
 
   // set values for GPU kernels
   // number of blocks per 1D grid
   dim3 dim2dGrid(ngrid, 1, 1);
-  //number of threads per 1D block
+  // number of threads per 1D block
   dim3 dim1dBlock(TPB, 1, 1);
 
-  if ( !memory_allocated ) {
-
+  if (!memory_allocated) {
     // allocate memory on the GPU
     dev_conserved = d_conserved;
-    //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_x,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_y,  n_fields*n_cells*sizeof(Real)) );
-
-    // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory.
-    // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep.
+    // GPU_Error_Check( cudaMalloc((void**)&dev_conserved,
+    // n_fields*n_cells*sizeof(Real)) );
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real)));
+
+    // If memory is single allocated: memory_allocated becomes true and
+    // successive timesteps won't allocate memory. If the memory is not single
+    // allocated: memory_allocated remains Null and memory is allocated every
+    // timestep.
     memory_allocated = true;
   }
 
-  // Step 1: Do the reconstruction
-  #ifdef PCM
-  hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields);
-  #endif
-  #ifdef PLMP
-  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  #endif
-  #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  #endif
-  #ifdef PPMP
-  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  #endif
-  #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  #endif
-  CudaCheckError();
-
-
-  // Step 2: Calculate the fluxes
-  #ifdef EXACT
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-  #endif
-  #ifdef ROE
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-  #endif
-  #ifdef HLLC
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-  #endif
-  CudaCheckError();
-
-  #ifdef DE
-  // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables
-  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0,  dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields );
-  #endif
+// Step 1: Do the reconstruction
+#ifdef PCM
+  hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny,
+                     n_ghost, gama, n_fields);
+#endif
+#ifdef PLMP
+  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt,
+                     gama, 1, n_fields);
+#endif
+#ifdef PLMC
+  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+                     n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1,
+                     n_fields);
+#endif
+#ifdef PPMP
+  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt,
+                     gama, 1, n_fields);
+#endif
+#ifdef PPMC
+  hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0);
+  hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1);
+#endif
+  GPU_Error_Check();
+
+// Step 2: Calculate the fluxes
+#ifdef EXACT
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
+#endif
+#ifdef ROE
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama,
+                     1, n_fields);
+#endif
+#ifdef HLLC
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
+#endif
+  GPU_Error_Check();
+
+#ifdef DE
+  // Compute the divergence of Vel before updating the conserved array, this
+  // solves synchronization issues when adding this term on
+  // Update_Conserved_Variables
+  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx,
+                     Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields);
+#endif
 
   // Step 3: Update the conserved variable array
-  hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields);
-  CudaCheckError();
+  hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off,
+                     y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav);
+  GPU_Error_Check();
 
-  // Synchronize the total and internal energy
-  #ifdef DE
+// Synchronize the total and internal energy
+#ifdef DE
   hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields);
   hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields);
-  CudaCheckError();
-  #endif
+  GPU_Error_Check();
+#endif
 
   return;
-
 }
 
-void Free_Memory_Simple_2D() {
-
+void Free_Memory_Simple_2D()
+{
   // free the GPU memory
   cudaFree(dev_conserved);
   cudaFree(Q_Lx);
@@ -123,8 +136,4 @@ void Free_Memory_Simple_2D() {
   cudaFree(Q_Ry);
   cudaFree(F_x);
   cudaFree(F_y);
-
 }
-
-#endif //CUDA
-
diff --git a/src/integrators/simple_2D_cuda.h b/src/integrators/simple_2D_cuda.h
index 7a531f952..a381c553a 100644
--- a/src/integrators/simple_2D_cuda.h
+++ b/src/integrators/simple_2D_cuda.h
@@ -1,16 +1,14 @@
 /*! \file simple_2D_cuda.h
  *  \brief Declarations for the cuda version of the 2D simple algorithm. */
 
-#ifdef CUDA
-
 #ifndef SIMPLE_2D_CUDA_H
 #define SIMPLE_2D_CUDA_H
 
 #include "../global/global.h"
 
-void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields);
+void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy,
+                              Real xbound, Real ybound, Real dt, int n_fields, int custom_grav);
 
 void Free_Memory_Simple_2D();
 
-#endif //SIMPLE_2D_CUDA_H
-#endif //CUDA
+#endif  // SIMPLE_2D_CUDA_H
diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu
index 1b854dea9..528eab04f 100644
--- a/src/integrators/simple_3D_cuda.cu
+++ b/src/integrators/simple_3D_cuda.cu
@@ -1,41 +1,37 @@
 /*! \file simple_3D_cuda.cu
  *  \brief Definitions of the cuda 3D simple algorithm functions. */
 
-#ifdef CUDA
 #ifdef SIMPLE
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../hydro/hydro_cuda.h"
-#include "../integrators/simple_3D_cuda.h"
-#include "../reconstruction/pcm_cuda.h"
-#include "../reconstruction/plmp_cuda.h"
-#include "../reconstruction/plmc_cuda.h"
-#include "../reconstruction/ppmp_cuda.h"
-#include "../reconstruction/ppmc_cuda.h"
-#include "../riemann_solvers/exact_cuda.h"
-#include "../riemann_solvers/roe_cuda.h"
-#include "../riemann_solvers/hllc_cuda.h"
-#include "../io/io.h"
-#include "../riemann_solvers/hll_cuda.h"
-
-
-
-void Simple_Algorithm_3D_CUDA(Real *d_conserved,  Real *d_grav_potential,
-          int nx, int ny, int nz, int x_off, int y_off,
-          int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound,
-          Real ybound, Real zbound, Real dt, int n_fields, Real density_floor,
-          Real U_floor,  Real *host_grav_potential  )
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../hydro/hydro_cuda.h"
+  #include "../integrators/simple_3D_cuda.h"
+  #include "../io/io.h"
+  #include "../reconstruction/pcm_cuda.h"
+  #include "../reconstruction/plmc_cuda.h"
+  #include "../reconstruction/plmp_cuda.h"
+  #include "../reconstruction/ppmc_cuda.h"
+  #include "../reconstruction/ppmp_cuda.h"
+  #include "../riemann_solvers/exact_cuda.h"
+  #include "../riemann_solvers/hll_cuda.h"
+  #include "../riemann_solvers/hllc_cuda.h"
+  #include "../riemann_solvers/roe_cuda.h"
+  #include "../utils/gpu.hpp"
+
+void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off,
+                              int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound,
+                              Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential)
 {
-  //Here, *dev_conserved contains the entire
-  //set of conserved variables on the grid
-  //concatenated into a 1-d array
-  int n_cells = nx*ny*nz;
-  int ngrid = (n_cells + TPB - 1) / TPB;
+  // Here, *dev_conserved contains the entire
+  // set of conserved variables on the grid
+  // concatenated into a 1-d array
+  int n_cells = nx * ny * nz;
+  int ngrid   = (n_cells + TPB - 1) / TPB;
 
   // set values for GPU kernels
   // number of blocks per 1D grid
@@ -43,125 +39,150 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved,  Real *d_grav_potential,
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB, 1, 1);
 
-  //host_grav_potential is NULL if not using GRAVITY
+  // host_grav_potential is NULL if not using GRAVITY
   temp_potential = host_grav_potential;
 
-  if ( !memory_allocated ){
+  if (!memory_allocated) {
     size_t global_free, global_total;
-    CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) );
-    
+    GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
+
     // allocate memory on the GPU
-    chprintf( " Allocating Hydro Memory: nfields: %d   n_cells: %d   nx: %d  ny: %d  nz: %d \n", n_fields, n_cells, nx, ny, nz );
-    chprintf( " Memory needed: %f GB    Free: %f GB    Total:  %f GB  \n", n_fields*n_cells*sizeof(Real)/1e9, global_free/1e9, global_total/1e9  );
+    chprintf(
+        " Allocating Hydro Memory: nfields: %d   n_cells: %d   nx: %d  ny: %d  "
+        "nz: %d \n",
+        n_fields, n_cells, nx, ny, nz);
+    chprintf(" Memory needed: %f GB    Free: %f GB    Total:  %f GB  \n", n_fields * n_cells * sizeof(Real) / 1e9,
+             global_free / 1e9, global_total / 1e9);
     dev_conserved = d_conserved;
-    CudaSafeCall( cudaMalloc((void**)&Q_Lx,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rx,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ly,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Ry,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Lz,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&Q_Rz,  n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_x,   n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_y,   n_fields*n_cells*sizeof(Real)) );
-    CudaSafeCall( cudaMalloc((void**)&F_z,   n_fields*n_cells*sizeof(Real)) );
-
-    #if defined( GRAVITY )
-    // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, n_cells*sizeof(Real)) );
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Lz, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&Q_Rz, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc((void **)&F_z, n_fields * n_cells * sizeof(Real)));
+
+  #if defined(GRAVITY)
+    // GPU_Error_Check( cudaMalloc((void**)&dev_grav_potential,
+    // n_cells*sizeof(Real)) );
     dev_grav_potential = d_grav_potential;
-    #else
+  #else
     dev_grav_potential = NULL;
-    #endif
+  #endif
 
-    // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory.
-    // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep.
+    // If memory is single allocated: memory_allocated becomes true and
+    // successive timesteps won't allocate memory. If the memory is not single
+    // allocated: memory_allocated remains Null and memory is allocated every
+    // timestep.
     memory_allocated = true;
-    chprintf( " Memory allocated \n"  );
-
+    chprintf(" Memory allocated \n");
   }
 
-  #if defined( GRAVITY ) && !defined( GRAVITY_GPU )
-  CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) );
+  #if defined(GRAVITY) && !defined(GRAVITY_GPU)
+  GPU_Error_Check(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice));
   #endif
 
-  
-  // Step 1: Construct left and right interface values using updated conserved variables
+  // Step 1: Construct left and right interface values using updated conserved
+  // variables
   #ifdef PCM
-  hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields);
+  hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz,
+                     Q_Rz, nx, ny, nz, n_ghost, gama, n_fields);
   #endif
   #ifdef PLMP
-  hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
-  #endif //PLMP
+  hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt,
+                     gama, 1, n_fields);
+  hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt,
+                     gama, 2, n_fields);
+  #endif  // PLMP
   #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+                     n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1,
+                     n_fields);
+  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2,
+                     n_fields);
   #endif
   #ifdef PPMP
-  hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
-  #endif //PPMP
+  hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt,
+                     gama, 1, n_fields);
+  hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt,
+                     gama, 2, n_fields);
+  #endif  // PPMP
   #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields);
-  hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
-  CudaCheckError();
-  #endif //PPMC
-  
-  
+  hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0);
+  hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1);
+  hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2);
+  GPU_Error_Check();
+  #endif  // PPMC
+
   // Step 2: Calculate the fluxes
   #ifdef EXACT
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-  #endif //EXACT
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost,
+                     gama, 2, n_fields);
+  #endif  // EXACT
   #ifdef ROE
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-  #endif //ROE
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama,
+                     1, n_fields);
+  hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama,
+                     2, n_fields);
+  #endif  // ROE
   #ifdef HLLC
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-  #endif //HLLC
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost,
+                     gama, 0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost,
+                     gama, 1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost,
+                     gama, 2, n_fields);
+  #endif  // HLLC
   #ifdef HLL
-  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields);
-  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields);
-  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields);
-  #endif //HLL
-  CudaCheckError();
-  
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama,
+                     0, n_fields);
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama,
+                     1, n_fields);
+  hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama,
+                     2, n_fields);
+  #endif  // HLL
+  GPU_Error_Check();
+
   #ifdef DE
-  // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D
-  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0,  dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz,  dt, gama, n_fields );
-  CudaCheckError();
+  // Compute the divergence of Vel before updating the conserved array, this
+  // solves synchronization issues when adding this term on
+  // Update_Conserved_Variables_3D
+  hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx,
+                     Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields);
+  GPU_Error_Check();
   #endif
-  
+
   // Step 3: Update the conserved variable array
-  hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved,  Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential);
-  CudaCheckError();
-  
+  hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry,
+                     Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound,
+                     zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential);
+  GPU_Error_Check();
+
   #ifdef DE
-  hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields);
+  hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost,
+                     n_fields);
   hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields);
-  CudaCheckError();
+  GPU_Error_Check();
   #endif
-  
-  #ifdef TEMPERATURE_FLOOR
-  hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor );
-  CudaCheckError();
-  #endif //TEMPERATURE_FLOOR
-
 
   return;
-
 }
 
-
-void Free_Memory_Simple_3D(){
-
+void Free_Memory_Simple_3D()
+{
   // free the GPU memory
   cudaFree(dev_conserved);
   cudaFree(Q_Lx);
@@ -173,11 +194,6 @@ void Free_Memory_Simple_3D(){
   cudaFree(F_x);
   cudaFree(F_y);
   cudaFree(F_z);
-
 }
 
-
-
-
-#endif //SIMPLE
-#endif //CUDA
+#endif  // SIMPLE
diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h
index 9c904d2e7..847b93c61 100644
--- a/src/integrators/simple_3D_cuda.h
+++ b/src/integrators/simple_3D_cuda.h
@@ -1,21 +1,16 @@
 /*! \file simple_3D_cuda.h
  *  \brief Declarations for the cuda version of the 3D simple algorithm. */
 
-#ifdef CUDA
-
 #ifndef SIMPLE_3D_CUDA_H
 #define SIMPLE_3D_CUDA_H
 
-#include"../global/global.h"
-#include"../chemistry_gpu/chemistry_gpu.h"
+#include "../chemistry_gpu/chemistry_gpu.h"
+#include "../global/global.h"
 
-void Simple_Algorithm_3D_CUDA(Real *d_conserved,  Real *d_grav_potential,
-  int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost,
-  Real dx, Real dy, Real dz, Real xbound,
-  Real ybound, Real zbound, Real dt, int n_fields, Real density_floor,
-  Real U_floor,  Real *host_grav_potential );
+void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off,
+                              int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound,
+                              Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential);
 
 void Free_Memory_Simple_3D();
 
-#endif //SIMPLE_3D_CUDA_H
-#endif //CUDA
+#endif  // SIMPLE_3D_CUDA_H
diff --git a/src/io/io.cpp b/src/io/io.cpp
index be0a1b9fa..536ede3c3 100644
--- a/src/io/io.cpp
+++ b/src/io/io.cpp
@@ -1,192 +1,217 @@
+#include <math.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdarg.h>
 #include <string.h>
-#include <iostream>
-#include <fstream>
-#include <math.h>
+
 #include <algorithm>
 #include <ctime>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <string>
 #ifdef HDF5
-#include <hdf5.h>
-#endif  //HDF5
-#include "../io/io.h"
+  #include <hdf5.h>
+#endif  // HDF5
 #include "../grid/grid3D.h"
+#include "../io/io.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/hydro_utilities.h"
+#include "../utils/mhd_utilities.h"
+#include "../utils/timing_functions.h"  // provides ScopedTimer
 #ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif  //MPI_CHOLLA
-#include "../utils/error_handling.h"
+  #include "../mpi/mpi_routines.h"
+#endif  // MPI_CHOLLA
 #include "../utils/DeviceVector.h"
+#include "../utils/error_handling.h"
 
 #ifdef COSMOLOGY
-#include "../cosmology/cosmology.h"
-#endif  //COSMOLOGY
-
-using namespace std;
+  #include "../cosmology/cosmology.h"
+#endif  // COSMOLOGY
 
-//#define OUTPUT_ENERGY
-//#define OUTPUT_MOMENTUM
+// #define OUTPUT_ENERGY
+// #define OUTPUT_MOMENTUM
 
-/* function used to rotate points about an axis in 3D for the rotated projection output routine */
-void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp);
+/* function used to rotate points about an axis in 3D for the rotated projection
+ * output routine */
+void Rotate_Point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp);
 
-void Create_Log_File( struct parameters P ){
+/* local function that designates whether we are using a root-process. It gives
+ * gives a sensible result regardless of whether we are using MPI */
+static inline bool Is_Root_Proc()
+{
+#ifdef MPI_CHOLLA
+  return procID == root;
+#else
+  return true;
+#endif
+}
 
-  #ifdef MPI_CHOLLA
-  if ( procID != 0 ) return;
-  #endif
+void Create_Log_File(struct Parameters P)
+{
+  if (not Is_Root_Proc()) {
+    return;
+  }
 
-  string file_name ( LOG_FILE_NAME );
-  chprintf( "\nCreating Log File: %s \n\n", file_name.c_str() );
+  std::string file_name(LOG_FILE_NAME);
+  chprintf("\nCreating Log File: %s \n\n", file_name.c_str());
 
   bool file_exists = false;
-  if (FILE *file = fopen(file_name.c_str(), "r")){
+  if (FILE *file = fopen(file_name.c_str(), "r")) {
     file_exists = true;
-    chprintf( "  File exists, appending values: %s \n\n", file_name.c_str() );
-    fclose( file );
+    chprintf("  File exists, appending values: %s \n\n", file_name.c_str());
+    fclose(file);
   }
 
   // current date/time based on current system
   time_t now = time(0);
   // convert now to string form
-  char* dt = ctime(&now);
+  char *dt = ctime(&now);
 
-  ofstream out_file;
-  out_file.open(file_name.c_str(), ios::app);
+  std::ofstream out_file;
+  out_file.open(file_name.c_str(), std::ios::app);
   out_file << "\n";
   out_file << "Run date: " << dt;
   out_file.close();
-
 }
 
-void Write_Message_To_Log_File( const char* message ){
-
-    #ifdef MPI_CHOLLA
-    if ( procID != 0 ) return;
-    #endif
-
+void Write_Message_To_Log_File(const char *message)
+{
+  if (not Is_Root_Proc()) {
+    return;
+  }
 
-    string file_name ( LOG_FILE_NAME );
-    ofstream out_file;
-    out_file.open(file_name.c_str(), ios::app);
-    out_file << message << endl;
-    out_file.close();
+  std::string file_name(LOG_FILE_NAME);
+  std::ofstream out_file;
+  out_file.open(file_name.c_str(), std::ios::app);
+  out_file << message << std::endl;
+  out_file.close();
 }
 
 /* Write Cholla Output Data */
-void WriteData(Grid3D &G, struct parameters P, int nfile)
+void Write_Data(Grid3D &G, struct Parameters P, int nfile)
 {
+  cudaMemcpy(G.C.density, G.C.device, G.H.n_fields * G.H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost);
 
-  cudaMemcpy(G.C.density, G.C.device, G.H.n_fields*G.H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost);
+  chprintf("\nSaving Snapshot: %d \n", nfile);
 
-  chprintf( "\nSaving Snapshot: %d \n", nfile );
+  // ensure the output-directory exists (try to create it if it doesn't exist)
+  // -> Aside: it would be nice to pass an FnameTemplate instance into each function that uses it,
+  //    rather than reconstructing it everywhere
+  Ensure_Dir_Exists(FnameTemplate(P).effective_output_dir_path(nfile));
 
-  #ifdef HDF5
+#ifdef HDF5
   // Initialize HDF5 interface
   H5open();
-  #endif
+#endif
 
-  #ifdef N_OUTPUT_COMPLETE
-  //If nfile is multiple of N_OUTPUT_COMPLETE then output all data
-  if ( nfile%N_OUTPUT_COMPLETE == 0 ){
+#ifdef N_OUTPUT_COMPLETE
+  // If nfile is multiple of N_OUTPUT_COMPLETE then output all data
+  if (nfile % N_OUTPUT_COMPLETE == 0) {
     G.H.Output_Complete_Data = true;
-    chprintf( " Writing all data ( Restart File ).\n");
-  }
-  else{
+    chprintf(" Writing all data ( Restart File ).\n");
+  } else {
     G.H.Output_Complete_Data = false;
   }
 
-  #else
-  //If NOT N_OUTPUT_COMPLETE: always output complete data
+#else
+  // If NOT N_OUTPUT_COMPLETE: always output complete data
   G.H.Output_Complete_Data = true;
-  #endif
+#endif
 
-  #ifdef COSMOLOGY
-  G.Change_Cosmological_Frame_Sytem( false );
-  #endif
+#ifdef COSMOLOGY
+  G.Change_Cosmological_Frame_Sytem(false);
+#endif
 
-  #ifndef ONLY_PARTICLES
+#ifndef ONLY_PARTICLES
   /*call the data output routine for Hydro data*/
-  if (nfile % P.n_hydro == 0) OutputData(G,P,nfile);
-  #endif
+  if (nfile % P.n_hydro == 0) {
+    Output_Data(G, P, nfile);
+  }
+#endif
 
-  // This function does other checks to make sure it is valid (3D only)
-  #ifdef HDF5
-  if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G,P,nfile);
-  #endif
+// This function does other checks to make sure it is valid (3D only)
+#ifdef HDF5
+  if (P.n_out_float32 && nfile % P.n_out_float32 == 0) {
+    Output_Float32(G, P, nfile);
+  }
+#endif
 
-  #ifdef PROJECTION
-  if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile);
-  #endif /*PROJECTION*/
+#ifdef PROJECTION
+  if (nfile % P.n_projection == 0) {
+    Output_Projected_Data(G, P, nfile);
+  }
+#endif /*PROJECTION*/
 
-  #ifdef ROTATED_PROJECTION
-  if (nfile % P.n_rotated_projection == 0) OutputRotatedProjectedData(G,P,nfile);
-  #endif /*ROTATED_PROJECTION*/
+#ifdef ROTATED_PROJECTION
+  if (nfile % P.n_rotated_projection == 0) {
+    Output_Rotated_Projected_Data(G, P, nfile);
+  }
+#endif /*ROTATED_PROJECTION*/
 
-  #ifdef SLICES
-  if (nfile % P.n_slice == 0) OutputSlices(G,P,nfile);
-  #endif /*SLICES*/
+#ifdef SLICES
+  if (nfile % P.n_slice == 0) {
+    Output_Slices(G, P, nfile);
+  }
+#endif /*SLICES*/
 
-  #ifdef PARTICLES
-  if (nfile % P.n_particle == 0) G.WriteData_Particles(P, nfile);
-  #endif
+#ifdef PARTICLES
+  if (nfile % P.n_particle == 0) {
+    G.WriteData_Particles(P, nfile);
+  }
+#endif
 
-  #ifdef COSMOLOGY
-  if ( G.H.OUTPUT_SCALE_FACOR || G.H.Output_Initial){
+#ifdef COSMOLOGY
+  if (G.H.OUTPUT_SCALE_FACOR || G.H.Output_Initial) {
     G.Cosmo.Set_Next_Scale_Output();
-    if ( !G.Cosmo.exit_now ){
-      chprintf( " Saved Snapshot: %d     z:%f   next_output: %f\n", nfile, G.Cosmo.current_z, 1/G.Cosmo.next_output - 1 );
+    if (!G.Cosmo.exit_now) {
+      chprintf(" Saved Snapshot: %d     z:%f   next_output: %f\n", nfile, G.Cosmo.current_z,
+               1 / G.Cosmo.next_output - 1);
       G.H.Output_Initial = false;
-    }
-    else{
-      chprintf( " Saved Snapshot: %d     z:%f   Exiting now\n", nfile, G.Cosmo.current_z );
+    } else {
+      chprintf(" Saved Snapshot: %d     z:%f   Exiting now\n", nfile, G.Cosmo.current_z);
     }
 
+  } else {
+    chprintf(" Saved Snapshot: %d     z:%f\n", nfile, G.Cosmo.current_z);
   }
-  else chprintf( " Saved Snapshot: %d     z:%f\n", nfile, G.Cosmo.current_z );
-  G.Change_Cosmological_Frame_Sytem( true );
-  chprintf( "\n" );
+  G.Change_Cosmological_Frame_Sytem(true);
+  chprintf("\n");
   G.H.Output_Now = false;
-  #endif
+#endif
 
-  #ifdef HDF5
+#ifdef HDF5
   // Cleanup HDF5
   H5close();
-  #endif
+#endif
 
-  #ifdef MPI_CHOLLA
+#if defined(GRAVITY) && defined(HDF5)
+  G.Grav.Write_Restart_HDF5(&P, nfile);
+#endif
+
+#ifdef MPI_CHOLLA
   MPI_Barrier(world);
-  #endif
+#endif
 }
 
-
 /* Output the grid data to file. */
-void OutputData(Grid3D &G, struct parameters P, int nfile)
+void Output_Data(Grid3D &G, struct Parameters P, int nfile)
 {
-  char filename[MAXLEN];
-  char timestep[20];
-
   // create the filename
-  strcpy(filename, P.outdir);
-  sprintf(timestep, "%d", nfile);
-  strcat(filename, timestep);
-  #if defined BINARY
-  strcat(filename, ".bin");
-  #elif defined HDF5
-  strcat(filename, ".h5");
-  #else
-  strcat(filename, ".txt");
-  if (G.H.nx*G.H.ny*G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n");
-  #endif
-  #ifdef MPI_CHOLLA
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif
+  std::string filename = FnameTemplate(P).format_fname(nfile, "");
+
+#if !defined(BINARY) && !defined(HDF5)
+  if (G.H.nx * G.H.ny * G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n");
+#endif
 
-  // open the file for binary writes
-  #if defined BINARY
+// open the file for binary writes
+#if defined BINARY
   FILE *out;
-  out = fopen(filename, "w");
-  if(out == NULL) {printf("Error opening output file.\n"); exit(-1); }
+  out = fopen(filename.data(), "w");
+  if (out == NULL) {
+    printf("Error opening output file.\n");
+    exit(-1);
+  }
 
   // write the header to the output file
   G.Write_Header_Binary(out);
@@ -197,13 +222,13 @@ void OutputData(Grid3D &G, struct parameters P, int nfile)
   // close the output file
   fclose(out);
 
-  // create the file for hdf5 writes
-  #elif defined HDF5
-  hid_t   file_id; /* file identifier */
-  herr_t  status;
+// create the file for hdf5 writes
+#elif defined HDF5
+  hid_t file_id; /* file identifier */
+  herr_t status;
 
   // Create a new file using default properties.
-  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
 
   // Write the header (file attributes)
   G.Write_Header_HDF5(file_id);
@@ -214,13 +239,19 @@ void OutputData(Grid3D &G, struct parameters P, int nfile)
   // close the file
   status = H5Fclose(file_id);
 
-  if (status < 0) {printf("File write failed.\n"); exit(-1); }
+  if (status < 0) {
+    printf("File write failed.\n");
+    exit(-1);
+  }
 
-  #else
+#else
   // open the file for txt writes
   FILE *out;
-  out = fopen(filename, "w");
-  if(out == NULL) {printf("Error opening output file.\n"); exit(-1); }
+  out = fopen(filename.data(), "w");
+  if (out == NULL) {
+    printf("Error opening output file.\n");
+    exit(-1);
+  }
 
   // write the header to the output file
   G.Write_Header_Text(out);
@@ -230,12 +261,12 @@ void OutputData(Grid3D &G, struct parameters P, int nfile)
 
   // close the output file
   fclose(out);
-  #endif
+#endif
 }
 
-void OutputFloat32(Grid3D &G, struct parameters P, int nfile)
+void Output_Float32(Grid3D &G, struct Parameters P, int nfile)
 {
-  
+#ifdef HDF5
   Header H = G.H;
   // Do nothing in 1-D and 2-D case
   if (H.ny_real == 1) {
@@ -249,24 +280,15 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile)
     return;
   }
 
-  char filename[MAXLEN];
-  char timestep[20];
-
   // create the filename
-  sprintf(timestep, "%d", nfile);
-  strcpy(filename, P.outdir);
-  strcat(filename, timestep);
-  strcat(filename, ".float32.h5");
-  #ifdef MPI_CHOLLA
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif
+  std::string filename = FnameTemplate(P).format_fname(nfile, ".float32");
 
   // create hdf5 file
-  hid_t   file_id; /* file identifier */
-  herr_t  status;
+  hid_t file_id; /* file identifier */
+  herr_t status;
 
   // Create a new file using default properties.
-  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
 
   // Write the header (file attributes)
   G.Write_Header_HDF5(file_id);
@@ -274,73 +296,98 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile)
   // write the conserved variables to the output file
 
   // 3-D Case
-  if (H.nx>1 && H.ny>1 && H.nz>1) {
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
     int nx_dset = H.nx_real;
     int ny_dset = H.ny_real;
     int nz_dset = H.nz_real;
     size_t buffer_size;
-    // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes.
-    // If the buffer is too large, it does not cause bugs (Oct 6 2022)
-#ifdef MHD
-    buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1);
-#else
-    buffer_size = nx_dset*ny_dset*nz_dset;
-#endif
+    // Need a larger device buffer for MHD. In the future, if other fields need
+    // a larger device buffer, choose the maximum of the sizes. If the buffer is
+    // too large, it does not cause bugs (Oct 6 2022)
+  #ifdef MHD
+    buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1);
+  #else
+    buffer_size = nx_dset * ny_dset * nz_dset;
+  #endif  // MHD
 
-    // Using static DeviceVector here automatically allocates the buffer the first time it is needed
-    // It persists until program exit, and then calls Free upon destruction
+    // Using static DeviceVector here automatically allocates the buffer the
+    // first time it is needed It persists until program exit, and then calls
+    // Free upon destruction
     cuda_utilities::DeviceVector<float> static device_dataset_vector{buffer_size};
-    float* device_dataset_buffer = device_dataset_vector.data();
-    float* dataset_buffer = (float *) malloc(buffer_size*sizeof(float));
-
-    if (P.out_float32_density > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density");
-    if (P.out_float32_momentum_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x");
-    if (P.out_float32_momentum_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y");
-    if (P.out_float32_momentum_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z");
-    if (P.out_float32_Energy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy");
-#ifdef DE
-    if (P.out_float32_GasEnergy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy");
-#endif  //DE
-#ifdef MHD
-    if (P.out_float32_magnetic_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x");
-    if (P.out_float32_magnetic_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y");
-    if (P.out_float32_magnetic_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z");
-#endif
+    auto *dataset_buffer = (float *)malloc(buffer_size * sizeof(float));
 
+    if (P.out_float32_density > 0) {
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_density, "/density");
+    }
+    if (P.out_float32_momentum_x > 0) {
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_momentum_x, "/momentum_x");
+    }
+    if (P.out_float32_momentum_y > 0) {
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_momentum_y, "/momentum_y");
+    }
+    if (P.out_float32_momentum_z > 0) {
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_momentum_z, "/momentum_z");
+    }
+    if (P.out_float32_Energy > 0) {
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_Energy, "/Energy");
+    }
+  #ifdef DE
+    if (P.out_float32_GasEnergy > 0) {
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_GasEnergy, "/GasEnergy");
+    }
+  #endif  // DE
+  #ifdef MHD
 
-    free(dataset_buffer);
+    // TODO (by Alwin, for anyone) : Repair output format if needed and remove these chprintfs when appropriate
+    if (P.out_float32_magnetic_x > 0) {
+      chprintf("WARNING: MHD float-32 output has a different output format than float-64\n");
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_magnetic_x, "/magnetic_x");
+    }
+    if (P.out_float32_magnetic_y > 0) {
+      chprintf("WARNING: MHD float-32 output has a different output format than float-64\n");
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_magnetic_y, "/magnetic_y");
+    }
+    if (P.out_float32_magnetic_z > 0) {
+      chprintf("WARNING: MHD float-32 output has a different output format than float-64\n");
+      Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer,
+                          device_dataset_vector.data(), G.C.d_magnetic_z, "/magnetic_z");
+    }
 
-    if (status < 0) {printf("File write failed.\n"); exit(-1); }
-  } // 3-D case
+  #endif  // MHD
 
-    // close the file
-  status = H5Fclose(file_id);
+    free(dataset_buffer);
 
+    if (status < 0) {
+      printf("File write failed.\n");
+      exit(-1);
+    }
+  }  // 3-D case
 
+  // close the file
+  status = H5Fclose(file_id);
+#endif  // HDF5
 }
 
-
 /* Output a projection of the grid data to file. */
-void OutputProjectedData(Grid3D &G, struct parameters P, int nfile)
+void Output_Projected_Data(Grid3D &G, struct Parameters P, int nfile)
 {
-  char filename[100];
-  char timestep[20];
-  #ifdef HDF5
-  hid_t   file_id;
-  herr_t  status;
+#ifdef HDF5
+  hid_t file_id;
+  herr_t status;
 
   // create the filename
-  strcpy(filename, P.outdir);
-  sprintf(timestep, "%d_proj", nfile);
-  strcat(filename,timestep);
-  strcat(filename,".h5");
-
-  #ifdef MPI_CHOLLA
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif /*MPI_CHOLLA*/
+  std::string filename = FnameTemplate(P).format_fname(nfile, "_proj");
 
   // Create a new file
-  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
 
   // Write header (file attributes)
   G.Write_Header_HDF5(file_id);
@@ -352,50 +399,44 @@ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile)
   status = H5Fclose(file_id);
 
   #ifdef MPI_CHOLLA
-  if (status < 0) {printf("OutputProjectedData: File write failed. ProcID: %d\n", procID); chexit(-1); }
+  if (status < 0) {
+    printf("Output_Projected_Data: File write failed. ProcID: %d\n", procID);
+    chexit(-1);
+  }
   #else
-  if (status < 0) {printf("OutputProjectedData: File write failed.\n"); exit(-1); }
+  if (status < 0) {
+    printf("Output_Projected_Data: File write failed.\n");
+    exit(-1);
+  }
   #endif
 
-  #else
-  printf("OutputProjected Data only defined for hdf5 writes.\n");
-  #endif //HDF5
+#else
+  printf("Output_Projected_Data only defined for hdf5 writes.\n");
+#endif  // HDF5
 }
 
-
 /* Output a rotated projection of the grid data to file. */
-void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile)
+void Output_Rotated_Projected_Data(Grid3D &G, struct Parameters P, int nfile)
 {
-  char filename[100];
-  char timestep[20];
-  #ifdef HDF5
-  hid_t   file_id;
-  herr_t  status;
+#ifdef HDF5
+  hid_t file_id;
+  herr_t status;
 
   // create the filename
-  strcpy(filename, P.outdir);
-  sprintf(timestep, "%d_rot_proj", nfile);
-  strcat(filename,timestep);
-  strcat(filename,".h5");
-
-  #ifdef MPI_CHOLLA
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif /*MPI_CHOLLA*/
+  std::string filename = FnameTemplate(P).format_fname(nfile, "_rot_proj");
 
-  if(G.R.flag_delta==1)
-  {
-    //if flag_delta==1, then we are just outputting a
-    //bunch of rotations of the same snapshot
+  if (G.R.flag_delta == 1) {
+    // if flag_delta==1, then we are just outputting a
+    // bunch of rotations of the same snapshot
     int i_delta;
     char fname[200];
 
-    for(i_delta=0;i_delta<G.R.n_delta;i_delta++)
-    {
-      sprintf(fname,"%s.%d",filename,G.R.i_delta);
-      chprintf("Outputting rotated projection %s.\n",fname);
+    for (i_delta = 0; i_delta < G.R.n_delta; i_delta++) {
+      filename += "." + std::to_string(G.R.i_delta);
+      chprintf("Outputting rotated projection %s.\n", fname);
 
-      //determine delta about z by output index
-      G.R.delta = 2.0*M_PI*((double) i_delta)/((double) G.R.n_delta);
+      // determine delta about z by output index
+      G.R.delta = 2.0 * M_PI * ((double)i_delta) / ((double)G.R.n_delta);
 
       // Create a new file
       file_id = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
@@ -408,25 +449,29 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile)
 
       // Close the file
       status = H5Fclose(file_id);
-      #ifdef MPI_CHOLLA
-      if (status < 0) {printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", procID); chexit(-1); }
-      #else
-      if (status < 0) {printf("OutputRotatedProjectedData: File write failed.\n"); exit(-1); }
-      #endif
+  #ifdef MPI_CHOLLA
+      if (status < 0) {
+        printf("Output_Rotated_Projected_Data: File write failed. ProcID: %d\n", procID);
+        chexit(-1);
+      }
+  #else
+      if (status < 0) {
+        printf("Output_Rotated_Projected_Data: File write failed.\n");
+        exit(-1);
+      }
+  #endif
 
-      //iterate G.R.i_delta
+      // iterate G.R.i_delta
       G.R.i_delta++;
     }
 
-  }
-  else if (G.R.flag_delta == 2) {
-
+  } else if (G.R.flag_delta == 2) {
     // case 2 -- outputting at a rotating delta
     // rotation rate given in the parameter file
-    G.R.delta = fmod(nfile*G.R.ddelta_dt*2.0*PI , (2.0*PI));
+    G.R.delta = fmod(nfile * G.R.ddelta_dt * 2.0 * M_PI, (2.0 * M_PI));
 
     // Create a new file
-    file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+    file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
 
     // Write header (file attributes)
     G.Write_Header_Rotated_HDF5(file_id);
@@ -436,13 +481,11 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile)
 
     // Close the file
     status = H5Fclose(file_id);
-  }
-  else {
-
-    //case 0 -- just output at the delta given in the parameter file
+  } else {
+    // case 0 -- just output at the delta given in the parameter file
 
     // Create a new file
-    file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+    file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
 
     // Write header (file attributes)
     G.Write_Header_Rotated_HDF5(file_id);
@@ -455,38 +498,34 @@ void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile)
   }
 
   #ifdef MPI_CHOLLA
-  if (status < 0) {printf("OutputRotatedProjectedData: File write failed. ProcID: %d\n", procID); chexit(-1); }
+  if (status < 0) {
+    printf("Output_Rotated_Projected_Data: File write failed. ProcID: %d\n", procID);
+    chexit(-1);
+  }
   #else
-  if (status < 0) {printf("OutputRotatedProjectedData: File write failed.\n"); exit(-1); }
+  if (status < 0) {
+    printf("Output_Rotated_Projected_Data: File write failed.\n");
+    exit(-1);
+  }
   #endif
 
-  #else
-  printf("OutputRotatedProjectedData only defined for HDF5 writes.\n");
-  #endif
+#else
+  printf("Output_Rotated_Projected_Data only defined for HDF5 writes.\n");
+#endif
 }
 
-
 /* Output xy, xz, and yz slices of the grid data. */
-void OutputSlices(Grid3D &G, struct parameters P, int nfile)
+void Output_Slices(Grid3D &G, struct Parameters P, int nfile)
 {
-  char filename[100];
-  char timestep[20];
-  #ifdef HDF5
-  hid_t   file_id;
-  herr_t  status;
+#ifdef HDF5
+  hid_t file_id;
+  herr_t status;
 
   // create the filename
-  strcpy(filename, P.outdir);
-  sprintf(timestep, "%d_slice", nfile);
-  strcat(filename,timestep);
-  strcat(filename,".h5");
-
-  #ifdef MPI_CHOLLA
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif /*MPI_CHOLLA*/
+  std::string filename = FnameTemplate(P).format_fname(nfile, "_slice");
 
   // Create a new file
-  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
 
   // Write header (file attributes)
   G.Write_Header_HDF5(file_id);
@@ -498,13 +537,19 @@ void OutputSlices(Grid3D &G, struct parameters P, int nfile)
   status = H5Fclose(file_id);
 
   #ifdef MPI_CHOLLA
-  if (status < 0) {printf("OutputSlices: File write failed. ProcID: %d\n", procID); chexit(-1); }
-  #else  // MPI_CHOLLA is not defined
-  if (status < 0) {printf("OutputSlices: File write failed.\n"); exit(-1); }
+  if (status < 0) {
+    printf("Output_Slices: File write failed. ProcID: %d\n", procID);
+    chexit(-1);
+  }
+  #else   // MPI_CHOLLA is not defined
+  if (status < 0) {
+    printf("Output_Slices: File write failed.\n");
+    exit(-1);
+  }
   #endif  // MPI_CHOLLA
-  #else  // HDF5 is not defined
-  printf("OutputSlices only defined for hdf5 writes.\n");
-  #endif //HDF5
+#else     // HDF5 is not defined
+  printf("Output_Slices only defined for hdf5 writes.\n");
+#endif    // HDF5
 }
 
 /*! \fn void Write_Header_Text(FILE *fp)
@@ -522,14 +567,12 @@ void Grid3D::Write_Header_Text(FILE *fp)
   fprintf(fp, "t: %f\n", H.t);
 }
 
-
 /*! \fn void Write_Header_Binary(FILE *fp)
  *  \brief Write the relevant header info to a binary output file. */
 void Grid3D::Write_Header_Binary(FILE *fp)
 {
-
   // Write the header info to the output file
-  //fwrite(&H, sizeof(H), 1, fp);
+  // fwrite(&H, sizeof(H), 1, fp);
   fwrite(&H.n_cells, sizeof(int), 1, fp);
   fwrite(&H.n_ghost, sizeof(int), 1, fp);
   fwrite(&H.nx, sizeof(int), 1, fp);
@@ -554,20 +597,18 @@ void Grid3D::Write_Header_Binary(FILE *fp)
   fwrite(&H.dt, sizeof(Real), 1, fp);
   fwrite(&H.t_wall, sizeof(Real), 1, fp);
   fwrite(&H.n_step, sizeof(int), 1, fp);
-
 }
 
-
 #ifdef HDF5
 /*! \fn void Write_Header_HDF5(hid_t file_id)
  *  \brief Write the relevant header info to the HDF5 file. */
 void Grid3D::Write_Header_HDF5(hid_t file_id)
 {
-  hid_t     attribute_id, dataspace_id;
-  herr_t    status;
-  hsize_t   attr_dims;
-  int       int_data[3];
-  Real      Real_data[3];
+  hid_t attribute_id, dataspace_id;
+  herr_t status;
+  hsize_t attr_dims;
+  int int_data[3];
+  Real Real_data[3];
 
   // Single attributes first
   attr_dims = 1;
@@ -584,77 +625,51 @@ void Grid3D::Write_Header_HDF5(hid_t file_id)
   hid_t stringType = H5Tcopy(H5T_C_S1);
   H5Tset_size(stringType, H5T_VARIABLE);
 
-  attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  const char * gitHash = GIT_HASH;
-  status = H5Awrite(attribute_id, stringType, &gitHash);
+  attribute_id        = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  const char *gitHash = GIT_HASH;
+  status              = H5Awrite(attribute_id, stringType, &gitHash);
+  H5Aclose(attribute_id);
+
+  attribute_id           = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  const char *macroFlags = MACRO_FLAGS;
+  status                 = H5Awrite(attribute_id, stringType, &macroFlags);
   H5Aclose(attribute_id);
 
-  attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  const char * macroFlags = MACRO_FLAGS;
-  status = H5Awrite(attribute_id, stringType, &macroFlags);
+  // attribute to help yt differentiate cholla outputs from outputs produced by other codes
+  attribute_id         = H5Acreate(file_id, "cholla", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  const char *dummyStr = "";  // this doesn't really matter right now
+  status               = H5Awrite(attribute_id, stringType, &dummyStr);
   H5Aclose(attribute_id);
 
   // Numeric Attributes
-  attribute_id = H5Acreate(file_id, "t", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.t);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "dt", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.dt);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "n_step", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_step);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "n_fields", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_fields);
-  status = H5Aclose(attribute_id);
-  double time_unit = TIME_UNIT;
-  attribute_id = H5Acreate(file_id, "time_unit", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &time_unit);
-  status = H5Aclose(attribute_id);
-  double length_unit = LENGTH_UNIT;
-  attribute_id = H5Acreate(file_id, "length_unit", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &length_unit);
-  status = H5Aclose(attribute_id);
-  double mass_unit = MASS_UNIT;
-  attribute_id = H5Acreate(file_id, "mass_unit", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &mass_unit);
-  status = H5Aclose(attribute_id);
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &H.t, "t");
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &H.dt, "dt");
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_step, "n_step");
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_fields, "n_fields");
+  double time_unit     = TIME_UNIT;
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &time_unit, "time_unit");
+  double length_unit   = LENGTH_UNIT;
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &length_unit, "length_unit");
+  double mass_unit     = MASS_UNIT;
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &mass_unit, "mass_unit");
   double velocity_unit = VELOCITY_UNIT;
-  attribute_id = H5Acreate(file_id, "velocity_unit", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &velocity_unit);
-  status = H5Aclose(attribute_id);
-  double density_unit = DENSITY_UNIT;
-  attribute_id = H5Acreate(file_id, "density_unit", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &density_unit);
-  status = H5Aclose(attribute_id);
-  double energy_unit = ENERGY_UNIT;
-  attribute_id = H5Acreate(file_id, "energy_unit", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &energy_unit);
-  status = H5Aclose(attribute_id);
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &velocity_unit, "velocity_unit");
+  double density_unit  = DENSITY_UNIT;
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &density_unit, "density_unit");
+  double energy_unit   = ENERGY_UNIT;
+  status               = Write_HDF5_Attribute(file_id, dataspace_id, &energy_unit, "energy_unit");
 
   #ifdef MHD
-    double magnetic_field_unit = MAGNETIC_FIELD_UNIT;
-    attribute_id = H5Acreate(file_id, "magnetic_field_unit", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &magnetic_field_unit);
-    status = H5Aclose(attribute_id);
-  #endif  //MHD
+  double magnetic_field_unit = MAGNETIC_FIELD_UNIT;
+  status                     = Write_HDF5_Attribute(file_id, dataspace_id, &magnetic_field_unit, "magnetic_field_unit");
+  #endif  // MHD
 
   #ifdef COSMOLOGY
-  attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.H0);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "Current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "Current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.H0, "H0");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_M, "Omega_M");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.Omega_L, "Omega_L");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_z, "Current_z");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &Cosmo.current_a, "Current_a");
   #endif
 
   // Close the dataspace
@@ -676,97 +691,61 @@ void Grid3D::Write_Header_HDF5(hid_t file_id)
   int_data[2] = nz_global;
   #endif
 
-  attribute_id = H5Acreate(file_id, "dims", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-  status = H5Aclose(attribute_id);
-
-  #ifdef  MHD
-    for (size_t i = 0; i < 3; i++)
-    {
-      int_data[i]++;
-    }
-
-    attribute_id = H5Acreate(file_id, "magnetic_field_dims", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-    status = H5Aclose(attribute_id);
-  #endif  //MHD
+  status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims");
 
   #ifdef MPI_CHOLLA
   int_data[0] = H.nx_real;
   int_data[1] = H.ny_real;
   int_data[2] = H.nz_real;
 
-  attribute_id = H5Acreate(file_id, "dims_local", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-  status = H5Aclose(attribute_id);
-
-  #ifdef  MHD
-    int_data[0] = H.nx_real + 1;
-    int_data[1] = H.ny_real + 1;
-    int_data[2] = H.nz_real + 1;
-
-    attribute_id = H5Acreate(file_id, "magnetic_field_dims_local", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-    status = H5Aclose(attribute_id);
-  #endif  //MHD
+  status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims_local");
 
   int_data[0] = nx_local_start;
   int_data[1] = ny_local_start;
   int_data[2] = nz_local_start;
 
-  attribute_id = H5Acreate(file_id, "offset", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "offset");
 
   int_data[0] = nproc_x;
   int_data[1] = nproc_y;
   int_data[2] = nproc_z;
 
-  attribute_id = H5Acreate(file_id, "nprocs", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "nprocs");
   #endif
 
   Real_data[0] = H.xbound;
   Real_data[1] = H.ybound;
   Real_data[2] = H.zbound;
 
-  attribute_id = H5Acreate(file_id, "bounds", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "bounds");
 
   Real_data[0] = H.xdglobal;
   Real_data[1] = H.ydglobal;
   Real_data[2] = H.zdglobal;
 
-  attribute_id = H5Acreate(file_id, "domain", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "domain");
 
   Real_data[0] = H.dx;
   Real_data[1] = H.dy;
   Real_data[2] = H.dz;
 
-  attribute_id = H5Acreate(file_id, "dx", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "dx");
 
   // Close the dataspace
   status = H5Sclose(dataspace_id);
-
 }
 
-
 /*! \fn void Write_Header_Rotated_HDF5(hid_t file_id)
- *  \brief Write the relevant header info to the HDF5 file for rotated projection. */
+ *  \brief Write the relevant header info to the HDF5 file for rotated
+ * projection. */
 void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id)
 {
-  hid_t     attribute_id, dataspace_id;
-  herr_t    status;
-  hsize_t   attr_dims;
-  int       int_data[3];
-  Real      Real_data[3];
-  Real      delta, theta, phi;
+  hid_t attribute_id, dataspace_id;
+  herr_t status;
+  hsize_t attr_dims;
+  int int_data[3];
+  Real Real_data[3];
+  Real delta, theta, phi;
 
   #ifdef MPI_CHOLLA
   // determine the size of the projection to output for this subvolume
@@ -777,19 +756,20 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id)
   R.nx_max = 0;
   R.nz_min = R.nz;
   R.nz_max = 0;
-  for (int i=0; i<2; i++) {
-    for (int j=0; j<2; j++) {
-      for (int k=0; k<2; k++) {
+  for (int i = 0; i < 2; i++) {
+    for (int j = 0; j < 2; j++) {
+      for (int k = 0; k < 2; k++) {
         // find the corners of this domain in the rotated position
-        Get_Position(H.n_ghost+i*(H.nx-2*H.n_ghost), H.n_ghost+j*(H.ny-2*H.n_ghost), H.n_ghost+k*(H.nz-2*H.n_ghost), &x, &y, &z);
+        Get_Position(H.n_ghost + i * (H.nx - 2 * H.n_ghost), H.n_ghost + j * (H.ny - 2 * H.n_ghost),
+                     H.n_ghost + k * (H.nz - 2 * H.n_ghost), &x, &y, &z);
         // rotate cell position
-        rotate_point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp);
-        //find projected location
-        //assumes box centered at [0,0,0]
-        alpha = (R.nx*(xp+0.5*R.Lx)/R.Lx);
-        beta  = (R.nz*(zp+0.5*R.Lz)/R.Lz);
-        ix = (int) round(alpha);
-        iz = (int) round(beta);
+        Rotate_Point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp);
+        // find projected location
+        // assumes box centered at [0,0,0]
+        alpha    = (R.nx * (xp + 0.5 * R.Lx) / R.Lx);
+        beta     = (R.nz * (zp + 0.5 * R.Lz) / R.Lz);
+        ix       = (int)round(alpha);
+        iz       = (int)round(beta);
         R.nx_min = std::min(ix, R.nx_min);
         R.nx_max = std::max(ix, R.nx_max);
         R.nz_min = std::min(iz, R.nz_min);
@@ -820,67 +800,37 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id)
   hid_t stringType = H5Tcopy(H5T_C_S1);
   H5Tset_size(stringType, H5T_VARIABLE);
 
-  attribute_id = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  const char * gitHash = GIT_HASH;
-  status = H5Awrite(attribute_id, stringType, &gitHash);
+  attribute_id        = H5Acreate(file_id, "Git Commit Hash", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  const char *gitHash = GIT_HASH;
+  status              = H5Awrite(attribute_id, stringType, &gitHash);
   H5Aclose(attribute_id);
 
-  attribute_id = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  const char * macroFlags = MACRO_FLAGS;
-  status = H5Awrite(attribute_id, stringType, &macroFlags);
+  attribute_id           = H5Acreate(file_id, "Macro Flags", stringType, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  const char *macroFlags = MACRO_FLAGS;
+  status                 = H5Awrite(attribute_id, stringType, &macroFlags);
   H5Aclose(attribute_id);
 
   // Numeric Attributes
-  attribute_id = H5Acreate(file_id, "t", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.t);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "dt", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H.dt);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "n_step", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_step);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "n_fields", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &H.n_fields);
-  status = H5Aclose(attribute_id);
-
-  //Rotation data
-  attribute_id = H5Acreate(file_id, "nxr", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nx);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "nzr", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nz);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "nx_min", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nx_min);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "nz_min", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nz_min);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "nx_max", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nx_max);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "nz_max", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, &R.nz_max);
-  status = H5Aclose(attribute_id);
-  delta = 180.*R.delta/M_PI;
-  attribute_id = H5Acreate(file_id, "delta", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &delta);
-  status = H5Aclose(attribute_id);
-  theta = 180.*R.theta/M_PI;
-  attribute_id = H5Acreate(file_id, "theta", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &theta);
-  status = H5Aclose(attribute_id);
-  phi = 180.*R.phi/M_PI;
-  attribute_id = H5Acreate(file_id, "phi", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &phi);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "Lx", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &R.Lx);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Acreate(file_id, "Lz", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &R.Lz);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &H.t, "t");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &H.dt, "dt");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_step, "n_step");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &H.n_fields, "n_fields");
+
+  // Rotation data
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nx, "nxr");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nz, "nzr");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nx_min, "nx_min");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nz_min, "nz_min");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nx_max, "nx_max");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.nz_max, "nz_max");
+  delta  = 180. * R.delta / M_PI;
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &delta, "delta");
+  theta  = 180. * R.theta / M_PI;
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &theta, "theta");
+  phi    = 180. * R.phi / M_PI;
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &phi, "phi");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.Lx, "Lx");
+  status = Write_HDF5_Attribute(file_id, dataspace_id, &R.Lz, "Lz");
   // Close the dataspace
   status = H5Sclose(dataspace_id);
 
@@ -900,59 +850,49 @@ void Grid3D::Write_Header_Rotated_HDF5(hid_t file_id)
   int_data[2] = nz_global;
   #endif
 
-  attribute_id = H5Acreate(file_id, "dims", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims");
 
   #ifdef MPI_CHOLLA
   int_data[0] = H.nx_real;
   int_data[1] = H.ny_real;
   int_data[2] = H.nz_real;
 
-  attribute_id = H5Acreate(file_id, "dims_local", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "dims_local");
 
   int_data[0] = nx_local_start;
   int_data[1] = ny_local_start;
   int_data[2] = nz_local_start;
 
-  attribute_id = H5Acreate(file_id, "offset", H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_INT, int_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, int_data, "offset");
   #endif
 
   Real_data[0] = H.xbound;
   Real_data[1] = H.ybound;
   Real_data[2] = H.zbound;
 
-  attribute_id = H5Acreate(file_id, "bounds", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "bounds");
 
   Real_data[0] = H.xdglobal;
   Real_data[1] = H.ydglobal;
   Real_data[2] = H.zdglobal;
 
-  attribute_id = H5Acreate(file_id, "domain", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "domain");
 
   Real_data[0] = H.dx;
   Real_data[1] = H.dy;
   Real_data[2] = H.dz;
 
-  attribute_id = H5Acreate(file_id, "dx", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data);
-  status = H5Aclose(attribute_id);
+  status = Write_HDF5_Attribute(file_id, dataspace_id, Real_data, "dx");
 
   // Close the dataspace
   status = H5Sclose(dataspace_id);
 
-  chprintf("Outputting rotation data with delta = %e, theta = %e, phi = %e, Lx = %f, Lz = %f\n",R.delta,R.theta,R.phi,R.Lx,R.Lz);
-
+  chprintf(
+      "Outputting rotation data with delta = %e, theta = %e, phi = %e, Lx = "
+      "%f, Lz = %f\n",
+      R.delta, R.theta, R.phi, R.Lx, R.Lz);
 }
-#endif  //HDF5
+#endif  // HDF5
 
 /*! \fn void Write_Grid_Text(FILE *fp)
  *  \brief Write the conserved quantities to a text output file. */
@@ -963,115 +903,116 @@ void Grid3D::Write_Grid_Text(FILE *fp)
   // Write the conserved quantities to the output file
 
   // 1D case
-  if (H.nx>1 && H.ny==1 && H.nz==1) {
+  if (H.nx > 1 && H.ny == 1 && H.nz == 1) {
     fprintf(fp, "id\trho\tmx\tmy\tmz\tE");
-    #ifdef  MHD
-     fprintf(fp, "\tmagX\tmagY\tmagZ");
-    #endif  //MHD
-    #ifdef DE
+#ifdef MHD
+    fprintf(fp, "\tmagX\tmagY\tmagZ");
+#endif  // MHD
+#ifdef DE
     fprintf(fp, "\tge");
-    #endif
+#endif
     fprintf(fp, "\n");
-    for (i=H.n_ghost; i < H.nx-H.n_ghost; i++) {
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
       id = i;
-      fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]);
-      #ifdef MHD
-        fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
-      #endif  //MHD
-      #ifdef DE
+      fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id],
+              C.momentum_z[id], C.Energy[id]);
+#ifdef MHD
+      fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
+#endif  // MHD
+#ifdef DE
       fprintf(fp, "\t%f", C.GasEnergy[id]);
-      #endif  //DE
+#endif  // DE
       fprintf(fp, "\n");
     }
-    #ifdef  MHD
-      // Save the last line of magnetic fields
-      id = H.nx-H.n_ghost;
-      fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
-      #ifdef DE
-        fprintf(fp, "\tNan");
-      #endif  //DE
-      fprintf(fp, "\n");
-    #endif  //MHD
+#ifdef MHD
+    // Save the last line of magnetic fields
+    id = H.nx - H.n_ghost;
+    fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
+  #ifdef DE
+    fprintf(fp, "\tNan");
+  #endif  // DE
+    fprintf(fp, "\n");
+#endif  // MHD
   }
 
   // 2D case
-  else if (H.nx>1 && H.ny>1 && H.nz==1) {
-
+  else if (H.nx > 1 && H.ny > 1 && H.nz == 1) {
     fprintf(fp, "idx\tidy\trho\tmx\tmy\tmz\tE");
-    #ifdef  MHD
-     fprintf(fp, "\tmagX\tmagY\tmagZ");
-    #endif  //MHD
-    #ifdef DE
+#ifdef MHD
+    fprintf(fp, "\tmagX\tmagY\tmagZ");
+#endif  // MHD
+#ifdef DE
     fprintf(fp, "\tge");
-    #endif
+#endif
     fprintf(fp, "\n");
-    for (i=H.n_ghost; i < H.nx-H.n_ghost; i++) {
-      for (j=H.n_ghost; j < H.ny-H.n_ghost; j++) {
-        id = i + j*H.nx;
-        fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]);
-        #ifdef MHD
-          fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
-        #endif  //MHD
-        #ifdef DE
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+      for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+        id = i + j * H.nx;
+        fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, C.density[id], C.momentum_x[id],
+                C.momentum_y[id], C.momentum_z[id], C.Energy[id]);
+#ifdef MHD
+        fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
+#endif  // MHD
+#ifdef DE
         fprintf(fp, "\t%f", C.GasEnergy[id]);
-        #endif  //DE
+#endif  // DE
         fprintf(fp, "\n");
       }
-      #ifdef  MHD
-        // Save the last line of magnetic fields
-        id = i + (H.ny-H.n_ghost)*H.nx;
-        fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i-H.n_ghost, H.ny-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
-        #ifdef DE
-          fprintf(fp, "\tNan");
-        #endif  //DE
-        fprintf(fp, "\n");
-      #endif  //MHD
-    }
-    #ifdef  MHD
+#ifdef MHD
       // Save the last line of magnetic fields
-      id = H.nx-H.n_ghost + (H.ny-H.n_ghost)*H.nx;
-      fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", H.nx-2*H.n_ghost, H.ny-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
-      #ifdef DE
-        fprintf(fp, "\tNan");
-      #endif  //DE
+      id = i + (H.ny - H.n_ghost) * H.nx;
+      fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i - H.n_ghost, H.ny - 2 * H.n_ghost, C.magnetic_x[id],
+              C.magnetic_y[id], C.magnetic_z[id]);
+  #ifdef DE
+      fprintf(fp, "\tNan");
+  #endif  // DE
       fprintf(fp, "\n");
-    #endif  //MHD
+#endif  // MHD
+    }
+#ifdef MHD
+    // Save the last line of magnetic fields
+    id = H.nx - H.n_ghost + (H.ny - H.n_ghost) * H.nx;
+    fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", H.nx - 2 * H.n_ghost, H.ny - 2 * H.n_ghost,
+            C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
+  #ifdef DE
+    fprintf(fp, "\tNan");
+  #endif  // DE
+    fprintf(fp, "\n");
+#endif  // MHD
   }
 
   // 3D case
   else {
     fprintf(fp, "idx\tidy\tidz\trho\tmx\tmy\tmz\tE");
-    #ifdef DE
+#ifdef DE
     fprintf(fp, "\tge");
-    #endif
-    #ifdef  MHD
-     fprintf(fp, "\tmagX\tmagY\tmagZ");
-    #endif  //MHD
+#endif
+#ifdef MHD
+    fprintf(fp, "\tmagX\tmagY\tmagZ");
+#endif  // MHD
     fprintf(fp, "\n");
-    for (i=H.n_ghost-1; i < H.nx-H.n_ghost; i++) {
-      for (j=H.n_ghost-1; j < H.ny-H.n_ghost; j++) {
-        for (k=H.n_ghost-1; k < H.nz-H.n_ghost; k++) {
-          id = i + j*H.nx + k*H.nx*H.ny;
+    for (i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) {
+      for (j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) {
+        for (k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) {
+          id = i + j * H.nx + k * H.nx * H.ny;
 
           // Exclude the rightmost ghost cell on the "left" side for the hydro
           // variables
-          if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost))
-          {
-            fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]);
-            #ifdef DE
+          if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost)) {
+            fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, k - H.n_ghost, C.density[id],
+                    C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]);
+#ifdef DE
             fprintf(fp, "\t%f", C.GasEnergy[id]);
-            #endif  //DE
-          }
-          else
-          {
-            fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost);
-            #ifdef DE
-              fprintf(fp, "\tn/a");
-            #endif  //DE
+#endif  // DE
+          } else {
+            fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i - H.n_ghost, j - H.n_ghost, k - H.n_ghost);
+#ifdef DE
+            fprintf(fp, "\tn/a");
+#endif  // DE
           }
-          #ifdef MHD
-            fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
-          #endif  //MHD
+#ifdef MHD
+          fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]);
+#endif  // MHD
           fprintf(fp, "\n");
         }
       }
@@ -1079,9 +1020,6 @@ void Grid3D::Write_Grid_Text(FILE *fp)
   }
 }
 
-
-
-
 /*! \fn void Write_Grid_Binary(FILE *fp)
  *  \brief Write the conserved quantities to a binary output file. */
 void Grid3D::Write_Grid_Binary(FILE *fp)
@@ -1091,174 +1029,320 @@ void Grid3D::Write_Grid_Binary(FILE *fp)
   // Write the conserved quantities to the output file
 
   // 1D case
-  if (H.nx>1 && H.ny==1 && H.nz==1) {
-
+  if (H.nx > 1 && H.ny == 1 && H.nz == 1) {
     id = H.n_ghost;
 
-    fwrite(&(C.density[id]),    sizeof(Real), H.nx_real, fp);
+    fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp);
     fwrite(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp);
     fwrite(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp);
     fwrite(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp);
-    fwrite(&(C.Energy[id]),     sizeof(Real), H.nx_real, fp);
-    #ifdef DE
-    fwrite(&(C.GasEnergy[id]),     sizeof(Real), H.nx_real, fp);
-    #endif //DE
+    fwrite(&(C.Energy[id]), sizeof(Real), H.nx_real, fp);
+#ifdef DE
+    fwrite(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp);
+#endif  // DE
   }
 
   // 2D case
-  else if (H.nx>1 && H.ny>1 && H.nz==1) {
-
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+  else if (H.nx > 1 && H.ny > 1 && H.nz == 1) {
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fwrite(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fwrite(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fwrite(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fwrite(&(C.Energy[id]), sizeof(Real), H.nx_real, fp);
     }
-    #ifdef DE
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+#ifdef DE
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fwrite(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp);
     }
-    #endif //DE
+#endif  // DE
 
   }
 
   // 3D case
   else {
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fwrite(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fwrite(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fwrite(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fwrite(&(C.Energy[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    #ifdef DE
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+#ifdef DE
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fwrite(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    #endif //DE
+#endif  // DE
   }
-
 }
 
+#ifdef HDF5
+herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double *attribute, const char *name)
+{
+  hid_t attribute_id = H5Acreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  herr_t status      = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, attribute);
+  status             = H5Aclose(attribute_id);
+  return status;
+}
 
+herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int *attribute, const char *name)
+{
+  hid_t attribute_id = H5Acreate(file_id, name, H5T_STD_I32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
+  herr_t status      = H5Awrite(attribute_id, H5T_NATIVE_INT, attribute);
+  status             = H5Aclose(attribute_id);
+  return status;
+}
 
+herr_t Read_HDF5_Dataset(hid_t file_id, double *dataset_buffer, const char *name)
+{
+  hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT);
+  herr_t status    = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  status           = H5Dclose(dataset_id);
+  return status;
+}
 
-#ifdef HDF5
+herr_t Read_HDF5_Dataset(hid_t file_id, float *dataset_buffer, const char *name)
+{
+  hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT);
+  herr_t status    = H5Dread(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  status           = H5Dclose(dataset_id);
+  return status;
+}
 
-// Helper function which uses the correct HDF5 arguments based on the type of dataset_buffer to avoid writing garbage
-herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name)
+// Helper function which uses the correct HDF5 arguments based on the type of
+// dataset_buffer to avoid writing garbage
+herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double *dataset_buffer, const char *name)
 {
-  // Create a dataset id for density
+  // Create the dataset id
   hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  // Write the density array to file
+  // Write the array to file
   herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
   // Free the dataset id
   status = H5Dclose(dataset_id);
   return status;
 }
 
-herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name)
+herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float *dataset_buffer, const char *name)
 {
-  // Create a dataset id for density
+  // Create the dataset id
   hid_t dataset_id = H5Dcreate(file_id, name, H5T_IEEE_F32BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  // Write the density array to file
+  // Write the array to file
   herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
   // Free the dataset id
   status = H5Dclose(dataset_id);
   return status;
 }
 
-
-void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name)
+void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real *dataset_buffer, Real *source,
+                             const char *name)
 {
   // Copy non-ghost source to Buffer
   int id = H.n_ghost;
-  memcpy(&dataset_buffer[0], &(source[id]), H.nx_real*sizeof(Real));
+  memcpy(&dataset_buffer[0], &(source[id]), H.nx_real * sizeof(Real));
   // Buffer write to HDF5 Dataset
-  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+  herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
 }
 
-void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* dataset_buffer, double* source, const char* name)
+void Write_HDF5_Field_1D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float *dataset_buffer, double *source,
+                             const char *name)
 {
   // Copy non-ghost source to Buffer with conversion from double to float
   int i;
-  for (i=0; i<H.nx_real; i++) {
-    dataset_buffer[i] = (float) source[i+H.n_ghost];
+  for (i = 0; i < H.nx_real; i++) {
+    dataset_buffer[i] = (float)source[i + H.n_ghost];
   }
   // Buffer write to HDF5 Dataset
-  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+  herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
 }
 
-void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real* dataset_buffer, Real* source, const char* name)
+void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, Real *dataset_buffer, Real *source,
+                             const char *name)
 {
-  int i,j,id,buf_id;
+  int i, j, id, buf_id;
   // Copy non-ghost source to Buffer
-  for (j=0; j<H.ny_real; j++) {
-    for (i=0; i<H.nx_real; i++) {
-      id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-      buf_id = j + i*H.ny_real;
+  for (j = 0; j < H.ny_real; j++) {
+    for (i = 0; i < H.nx_real; i++) {
+      id                     = (i + H.n_ghost) + (j + H.n_ghost) * H.nx;
+      buf_id                 = j + i * H.ny_real;
       dataset_buffer[buf_id] = source[id];
     }
   }
   // Buffer write to HDF5 Dataset
-  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+  herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
 }
 
 // Convert double to float if necessary
-void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* dataset_buffer, double* source, const char* name)
+void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float *dataset_buffer, double *source,
+                             const char *name)
 {
-  int i,j,id,buf_id;
+  int i, j, id, buf_id;
   // Copy non-ghost source to Buffer with conversion to float
-  for (j=0; j<H.ny_real; j++) {
-    for (i=0; i<H.nx_real; i++) {
-      id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-      buf_id = j + i*H.ny_real;
-      dataset_buffer[buf_id] = (float) source[id];
+  for (j = 0; j < H.ny_real; j++) {
+    for (i = 0; i < H.nx_real; i++) {
+      id                     = (i + H.n_ghost) + (j + H.n_ghost) * H.nx;
+      buf_id                 = j + i * H.ny_real;
+      dataset_buffer[buf_id] = (float)source[id];
     }
   }
   // Buffer write to HDF5 Dataset
-  herr_t status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+  herr_t status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+}
+
+/* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */
+void Fill_HDF5_Buffer_From_Grid_CPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                    Real *hdf5_buffer, Real *grid_buffer)
+{
+  int i, j, k, id, buf_id;
+  // 3D case
+  if (nx > 1 && ny > 1 && nz > 1) {
+    for (k = 0; k < nz_real; k++) {
+      for (j = 0; j < ny_real; j++) {
+        for (i = 0; i < nx_real; i++) {
+          id                  = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny;
+          buf_id              = k + j * nz_real + i * nz_real * ny_real;
+          hdf5_buffer[buf_id] = grid_buffer[id];
+        }
+      }
+    }
+    return;
+  }
+
+  // 2D case
+  if (nx > 1 && ny > 1 && nz == 1) {
+    for (j = 0; j < ny_real; j++) {
+      for (i = 0; i < nx_real; i++) {
+        id                  = (i + n_ghost) + (j + n_ghost) * nx;
+        buf_id              = j + i * ny_real;
+        hdf5_buffer[buf_id] = grid_buffer[id];
+      }
+    }
+    return;
+  }
+
+  // 1D case
+  if (nx > 1 && ny == 1 && nz == 1) {
+    id = n_ghost;
+    memcpy(&hdf5_buffer[0], &grid_buffer[id], nx_real * sizeof(Real));
+    return;
+  }
+}
+
+/* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */
+void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                    Real *hdf5_buffer, Real *device_hdf5_buffer, Real *device_grid_buffer);
+// From src/io/io_gpu
+
+// Set up dataspace for grid formatted data and write dataset
+void Write_HDF5_Dataset_Grid(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, hid_t file_id,
+                             Real *dataset_buffer, const char *name)
+{
+  // Set up dataspace
+
+  hid_t dataspace_id;
+  // 1-D Case
+  if (nx > 1 && ny == 1 && nz == 1) {
+    int rank = 1;
+    hsize_t dims[1];
+    dims[0]      = nx_real;
+    dataspace_id = H5Screate_simple(rank, dims, NULL);
+  }
+  // 2-D Case
+  if (nx > 1 && ny > 1 && nz == 1) {
+    int rank = 2;
+    hsize_t dims[2];
+    dims[0]      = nx_real;
+    dims[1]      = ny_real;
+    dataspace_id = H5Screate_simple(rank, dims, NULL);
+  }
+  // 3-D Case
+  if (nx > 1 && ny > 1 && nz > 1) {
+    int rank = 3;
+    hsize_t dims[3];
+    dims[0]      = nx_real;
+    dims[1]      = ny_real;
+    dims[2]      = nz_real;
+    dataspace_id = H5Screate_simple(rank, dims, NULL);
+  }
+
+  // Write to HDF5 file
+
+  Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name);
+
+  // Close dataspace
+  herr_t status = H5Sclose(dataspace_id);
+}
+
+// Data moves from host grid_buffer to dataset_buffer to hdf5 file
+void Write_Grid_HDF5_Field_CPU(Header H, hid_t file_id, Real *dataset_buffer, Real *grid_buffer, const char *name)
+{
+  Fill_HDF5_Buffer_From_Grid_CPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer,
+                                 grid_buffer);
+  Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name);
+}
+
+// Data moves from device_grid_buffer to device_hdf5_buffer to dataset_buffer to hdf5 file
+void Write_Grid_HDF5_Field_GPU(Header H, hid_t file_id, Real *dataset_buffer, Real *device_hdf5_buffer,
+                               Real *device_grid_buffer, const char *name)
+{
+  Fill_HDF5_Buffer_From_Grid_GPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer,
+                                 device_hdf5_buffer, device_grid_buffer);
+  Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name);
+}
+
+void Write_Generic_HDF5_Field_CPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                  hid_t file_id, Real *dataset_buffer, Real *source_buffer, const char *name)
+{
+  Fill_HDF5_Buffer_From_Grid_CPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, source_buffer);
+  Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name);
+}
+
+void Write_Generic_HDF5_Field_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                  hid_t file_id, Real *dataset_buffer, Real *device_hdf5_buffer, Real *source_buffer,
+                                  const char *name)
+{
+  Fill_HDF5_Buffer_From_Grid_GPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, device_hdf5_buffer,
+                                 source_buffer);
+  Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name);
 }
 
 /*! \fn void Write_Grid_HDF5(hid_t file_id)
@@ -1266,562 +1350,361 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float*
 void Grid3D::Write_Grid_HDF5(hid_t file_id)
 {
   int i, j, k, id, buf_id;
-  hid_t     dataset_id, dataspace_id;
-  hid_t     dataset_id_full, dataspace_id_full;
-  Real      *dataset_buffer;
-  herr_t    status;
+  hid_t dataset_id, dataspace_id;
+  hid_t dataset_id_full, dataspace_id_full;
+  Real *dataset_buffer;
+  herr_t status;
 
   bool output_energy;
   bool output_momentum;
 
-
   #ifdef OUTPUT_ENERGY
   output_energy = true;
-  #else  // not OUTPUT_ENERGY
+  #else   // not OUTPUT_ENERGY
   output_energy = false;
-  #endif  //OUTPUT_ENERGY
+  #endif  // OUTPUT_ENERGY
 
   #ifdef OUTPUT_MOMENTUM
   output_momentum = true;
-  #else  // not OUTPUT_MOMENTUM
+  #else   // not OUTPUT_MOMENTUM
   output_momentum = false;
-  #endif  //OUTPUT_MOMENTUM
+  #endif  // OUTPUT_MOMENTUM
 
   #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU)
   bool output_metals, output_electrons, output_full_ionization;
-  #ifdef OUTPUT_METALS
+    #ifdef OUTPUT_METALS
   output_metals = true;
-  #else  // not OUTPUT_METALS
+    #else   // not OUTPUT_METALS
   output_metals = false;
-  #endif  //OUTPUT_METALS
-  #ifdef OUTPUT_ELECTRONS
+    #endif  // OUTPUT_METALS
+    #ifdef OUTPUT_ELECTRONS
   output_electrons = true;
-  #else  // not OUTPUT_ELECTRONS
+    #else   // not OUTPUT_ELECTRONS
   output_electrons = false;
-  #endif  //OUTPUT_ELECTRONS
-  #ifdef OUTPUT_FULL_IONIZATION
+    #endif  // OUTPUT_ELECTRONS
+    #ifdef OUTPUT_FULL_IONIZATION
   output_full_ionization = true;
-  #else  // not OUTPUT_FULL_IONIZATION
+    #else   // not OUTPUT_FULL_IONIZATION
   output_full_ionization = false;
-  #endif  //OUTPUT_FULL_IONIZATION
+    #endif  // OUTPUT_FULL_IONIZATION
 
-  #endif // COOLING_GRACKLE or CHEMISTRY_GPU
+  #endif  // COOLING_GRACKLE or CHEMISTRY_GPU
 
-  #if defined(GRAVITY_GPU) && defined(OUTPUT_POTENTIAL)
-  CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) );
-  #endif//GRAVITY_GPU and OUTPUT_POTENTIAL
-
-
-
-  // 1D case
-  if (H.nx>1 && H.ny==1 && H.nz==1) {
-
-    int       nx_dset = H.nx_real;
-    hsize_t   dims[1];
-    dataset_buffer = (Real *) malloc(H.nx_real*sizeof(Real));
-
-    // Create the data space for the datasets
-    dims[0] = nx_dset;
-    dataspace_id = H5Screate_simple(1, dims, NULL);
-
-    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density");
-    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x");
-    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y");
-    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z");
-    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy");
-
-    #ifdef SCALAR
-    for (int s=0; s<NSCALARS; s++) {
-      // create the name of the dataset
-      char dataset[100];
-      char number[10];
-      strcpy(dataset, "/scalar");
-      sprintf(number, "%d", s);
-      strcat(dataset,number);
-      // Copy the scalar array to the memory buffer
-
-      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted.
-      // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset);
-
-      id = H.n_ghost;
-      memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real));
-      // dataset here is just a name
-      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset);
-    }
-
-    #endif  //SCALAR
+  // Allocate necessary buffers
+  int nx_dset = H.nx_real;
+  int ny_dset = H.ny_real;
+  int nz_dset = H.nz_real;
+  #ifdef MHD
+  size_t buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1);
+  #else
+  size_t buffer_size = nx_dset * ny_dset * nz_dset;
+  #endif
+  cuda_utilities::DeviceVector<Real> static device_dataset_vector{buffer_size};
+  dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real));
 
-    #ifdef DE
-    Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy");
-    #endif  //DE
+  // Start writing fields
 
-    // Free the dataspace id
-    status = H5Sclose(dataspace_id);
+  Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_density, "/density");
+  if (output_momentum || H.Output_Complete_Data) {
+    Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_x, "/momentum_x");
+    Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_y, "/momentum_y");
+    Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_z, "/momentum_z");
   }
-
-
-  // 2D case
-  if (H.nx>1 && H.ny>1 && H.nz==1) {
-
-    int       nx_dset = H.nx_real;
-    int       ny_dset = H.ny_real;
-    hsize_t   dims[2];
-    dataset_buffer = (Real *) malloc(H.ny_real*H.nx_real*sizeof(Real));
-
-    // Create the data space for the datasets
-    dims[0] = nx_dset;
-    dims[1] = ny_dset;
-    dataspace_id = H5Screate_simple(2, dims, NULL);
-
-    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density");
-    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x");
-    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y");
-    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z");
-    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy");
-
-    #ifdef SCALAR
-    for (int s=0; s<NSCALARS; s++) {
-      // create the name of the dataset
-      char dataset[100];
-      char number[10];
-      strcpy(dataset, "/scalar");
-      sprintf(number, "%d", s);
-      strcat(dataset,number);
-
-      // TODO: If there is a test case for regression testing NSCALARS > 1 this substitution can be attempted.
-      // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset);
-
-      // Copy the scalar array to the memory buffer
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-          buf_id = j + i*H.ny_real;
-          dataset_buffer[buf_id] = C.scalar[id+s*H.n_cells];
-        }
-      }
-
-      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset);
-    }
-    #endif  //SCALAR
-
-
-    #ifdef DE
-    Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy");
-    #endif  //DE
-
-    // Free the dataspace id
-    status = H5Sclose(dataspace_id);
+  if (output_energy || H.Output_Complete_Data) {
+    Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_Energy, "/Energy");
+  #ifdef DE
+    Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_GasEnergy, "/GasEnergy");
+  #endif
   }
 
-  // 3D case
-  if (H.nx>1 && H.ny>1 && H.nz>1) {
-
-    int       nx_dset = H.nx_real;
-    int       ny_dset = H.ny_real;
-    int       nz_dset = H.nz_real;
-    hsize_t   dims[3];
-    hsize_t   dims_full[3];
+  #ifdef SCALAR
 
-    size_t buffer_size;
-    // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes.
-    // If the buffer is too large, it does not cause bugs (Oct 6 2022)
-    #ifdef MHD
-    buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1);
-    #else
-    buffer_size = nx_dset*ny_dset*nz_dset;
-    #endif
-    // Using static DeviceVector here automatically allocates the buffer the first time it is needed
-    // It persists until program exit, and then calls Free upon destruction
-    cuda_utilities::DeviceVector<double> static device_dataset_vector{buffer_size};
-    double* device_dataset_buffer = device_dataset_vector.data();
-    dataset_buffer = (Real*) malloc(buffer_size*sizeof(Real));
-    //CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double)));
-
-
-    // Create the data space for the datasets (note: WriteHDF5Field3D creates its own dataspace, does not use the shared one)
-    dims[0] = nx_dset;
-    dims[1] = ny_dset;
-    dims[2] = nz_dset;
-    dataspace_id = H5Screate_simple(3, dims, NULL);
-    WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density");
-    if ( output_momentum || H.Output_Complete_Data ) {
-      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x");
-      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y");
-      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z");
-    }
+    #ifdef BASIC_SCALAR
+  Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_basic_scalar, "/scalar0");
+    #endif  // BASIC_SCALAR
 
-    if ( output_energy || H.Output_Complete_Data ){
-      WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy");
-    }
+    #ifdef DUST
+  Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_dust_density,
+                            "/dust_density");
+    #endif  // DUST
 
-    #ifdef SCALAR
-    #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle
-    for (int s=0; s<NSCALARS; s++) {
-      // create the name of the dataset
-      char dataset[100];
-      char number[10];
-      strcpy(dataset, "/scalar");
-      sprintf(number, "%d", s);
-      strcat(dataset,number);
-      // Copy the scalar array to the memory buffer
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            dataset_buffer[buf_id] = C.scalar[id+s*H.n_cells];
-          }
-        }
-      }
-      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset);
-    }
-    #else // COOLING_GRACKLE or CHEMISTRY_GPU. Write Chemistry when using GRACKLE
     #ifdef OUTPUT_CHEMISTRY
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          #ifdef COOLING_GRACKLE
-          dataset_buffer[buf_id] = Cool.fields.HI_density[id];
-          #endif  //COOLING_GRACKLE
-          #ifdef CHEMISTRY_GPU
-          dataset_buffer[buf_id] = C.HI_density[id];
-          #endif  //CHEMISTRY_GPU
-        }
-      }
-    }
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HI_density");
-
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          #ifdef COOLING_GRACKLE
-          dataset_buffer[buf_id] = Cool.fields.HII_density[id];
-          #endif  //COOLING_GRACKLE
-          #ifdef CHEMISTRY_GPU
-          dataset_buffer[buf_id] = C.HII_density[id];
-          #endif  //CHEMISTRY_GPU
-
-        }
-      }
-    }
-    if ( output_full_ionization || H.Output_Complete_Data ){
-      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HII_density");
-    }
-
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          #ifdef COOLING_GRACKLE
-          dataset_buffer[buf_id] = Cool.fields.HeI_density[id];
-          #endif  //COOLING_GRACKLE
-          #ifdef CHEMISTRY_GPU
-          dataset_buffer[buf_id] = C.HeI_density[id];
-          #endif  //CHEMISTRY_GPU
-        }
-      }
-    }
-    if ( output_full_ionization || H.Output_Complete_Data ){
-      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeI_density");
-    }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          #ifdef COOLING_GRACKLE
-          dataset_buffer[buf_id] = Cool.fields.HeII_density[id];
-          #endif  //COOLING_GRACKLE
-          #ifdef CHEMISTRY_GPU
-          dataset_buffer[buf_id] = C.HeII_density[id];
-          #endif  //CHEMISTRY_GPU
-        }
-      }
-    }
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeII_density");
-
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          #ifdef COOLING_GRACKLE
-          dataset_buffer[buf_id] = Cool.fields.HeIII_density[id];
-          #endif  //COOLING_GRACKLE
-          #ifdef CHEMISTRY_GPU
-          dataset_buffer[buf_id] = C.HeIII_density[id];
-          #endif  //CHEMISTRY_GPU
-        }
-      }
-    }
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/HeIII_density");
-
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          #ifdef COOLING_GRACKLE
-          dataset_buffer[buf_id] = Cool.fields.e_density[id];
-          #endif  //COOLING_GRACKLE
-          #ifdef CHEMISTRY_GPU
-          dataset_buffer[buf_id] = C.e_density[id];
-          #endif  //CHEMISTRY_GPU
-        }
-      }
-    }
-    if ( output_electrons || H.Output_Complete_Data ){
-      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/e_density");
-    }
-
+      #ifdef CHEMISTRY_GPU
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HI_density, "/HI_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HII_density, "/HII_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeI_density, "/HeI_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeII_density, "/HeII_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.HeIII_density, "/HeIII_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, C.e_density, "/e_density");
+      #elif defined(COOLING_GRACKLE)
+  // Cool fields are CPU (host) only
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HI_density, "/HI_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HII_density, "/HII_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeI_density, "/HeI_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeII_density, "/HeII_density");
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.HeIII_density, "/HeIII_density");
+  if (output_electrons || H.Output_Complete_Data) {
+    Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.e_density, "/e_density");
+  }
+      #endif
+    #endif  // OUTPUT_CHEMISTRY
 
-    #ifdef GRACKLE_METALS
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          dataset_buffer[buf_id] = Cool.fields.metal_density[id];
-        }
-      }
-    }
-    if ( output_metals || H.Output_Complete_Data ){
-      status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/metal_density");
-    }
-    #endif //GRACKLE_METALS
-
-    #endif //OUTPUT_CHEMISTRY
-
-    #ifdef OUTPUT_TEMPERATURE
-
-    #ifdef CHEMISTRY_GPU
-    Compute_Gas_Temperature( Chem.Fields.temperature_h, false );
-    #endif  //CHEMISTRY_GPU
-
-    // Copy the internal energy array to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          #ifdef COOLING_GRACKLE
-          dataset_buffer[buf_id] = Cool.temperature[id];
-          #endif
-          #ifdef CHEMISTRY_GPU
-          dataset_buffer[buf_id] = Chem.Fields.temperature_h[id];
-          #endif
-        }
-      }
-    }
+    #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU)
 
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/temperature");
+      #ifdef GRACKLE_METALS
+  if (output_metals || H.Output_Complete_Data) {
+    Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.fields.metal_density, "/metal_density");
+  }
+      #endif  // GRACKLE_METALS
+
+      #ifdef OUTPUT_TEMPERATURE
+        #ifdef CHEMISTRY_GPU
+  Compute_Gas_Temperature(Chem.Fields.temperature_h, false);
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Chem.Fields.temperature_h, "/temperature");
+        #elif defined(COOLING_GRACKLE)
+  Write_Grid_HDF5_Field_CPU(H, file_id, dataset_buffer, Cool.temperature, "/temperature");
+        #endif
+      #endif
 
-    #endif //OUTPUT_TEMPERATURE
+    #endif  // COOLING_GRACKLE || CHEMISTRY_GPU
 
-    #endif //COOLING_GRACKLE
-    #endif //SCALAR
+  #endif  // SCALAR
 
-    #ifdef DE
-    if ( output_energy || H.Output_Complete_Data ){
-    WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_GasEnergy, "/GasEnergy");
-    }
-    #endif  //DE
-
-    #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL)
-    // Copy the potential array to the memory buffer
-    for (k=0; k<Grav.nz_local; k++) {
-      for (j=0; j<Grav.ny_local; j++) {
-        for (i=0; i<Grav.nx_local; i++) {
-          // id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          // buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          id = (i+N_GHOST_POTENTIAL) + (j+N_GHOST_POTENTIAL)*(Grav.nx_local+2*N_GHOST_POTENTIAL) + (k+N_GHOST_POTENTIAL)*(Grav.nx_local+2*N_GHOST_POTENTIAL)*(Grav.ny_local+2*N_GHOST_POTENTIAL);
-          buf_id = k + j*Grav.nz_local + i*Grav.nz_local*Grav.ny_local;
-          dataset_buffer[buf_id] = Grav.F.potential_h[id];
-        }
-      }
-    }
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, "/grav_potential");
-    #endif//GRAVITY and OUTPUT_POTENTIAL
+  // 3D case
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
+  #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL)
+    Write_Generic_HDF5_Field_GPU(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL,
+                                 Grav.nz_local + 2 * N_GHOST_POTENTIAL, Grav.nx_local, Grav.ny_local, Grav.nz_local,
+                                 N_GHOST_POTENTIAL, file_id, dataset_buffer, device_dataset_vector.data(),
+                                 Grav.F.potential_d, "/grav_potential");
+  #endif  // GRAVITY and OUTPUT_POTENTIAL
 
-    #ifdef  MHD
+  #ifdef MHD
     if (H.Output_Complete_Data) {
-      // Note: for WriteHDF5Field3D, use the left side n_ghost
-      WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_x, "/magnetic_x");
-      WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_y, "/magnetic_y");
-      WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_z, "/magnetic_z");
-    }
-    #endif  //MHD
-
-    // Free the dataspace id
-    status = H5Sclose(dataspace_id);
-    //CudaSafeCall(cudaFree(device_dataset_buffer));// No longer needed because devicevector frees when it should
+      Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), C.d_magnetic_x, "/magnetic_x", 0);
+      Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), C.d_magnetic_y, "/magnetic_y", 1);
+      Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer,
+                          device_dataset_vector.data(), C.d_magnetic_z, "/magnetic_z", 2);
+    }
+  #endif  // MHD
   }
+
   free(dataset_buffer);
 }
-#endif //HDF5
-
+#endif  // HDF5
 
 #ifdef HDF5
 /*! \fn void Write_Projection_HDF5(hid_t file_id)
- *  \brief Write projected density and temperature data to a file, at the current simulation time. */
+ *  \brief Write projected density and temperature data to a file, at the
+ * current simulation time. */
 void Grid3D::Write_Projection_HDF5(hid_t file_id)
 {
-  int i, j, k, id, buf_id;
-  hid_t     dataset_id, dataspace_xy_id, dataspace_xz_id;
-  Real      *dataset_buffer_dxy, *dataset_buffer_dxz;
-  Real      *dataset_buffer_Txy, *dataset_buffer_Txz;
-  herr_t    status;
-  Real dxy, dxz, Txy, Txz, n, T;
-
+  hid_t dataset_id, dataspace_xy_id, dataspace_xz_id;
+  Real *dataset_buffer_dxy, *dataset_buffer_dxz;
+  Real *dataset_buffer_Txy, *dataset_buffer_Txz;
+  herr_t status;
+  Real dxy, dxz, Txy, Txz;
+  #ifdef DUST
+  Real dust_xy, dust_xz;
+  Real *dataset_buffer_dust_xy, *dataset_buffer_dust_xz;
+  #endif
 
-  n = T = 0;
   Real mu = 0.6;
 
   // 3D
-  if (H.nx>1 && H.ny>1 && H.nz>1) {
-
-    int       nx_dset = H.nx_real;
-    int       ny_dset = H.ny_real;
-    int       nz_dset = H.nz_real;
-    hsize_t   dims[2];
-    dataset_buffer_dxy = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    dataset_buffer_dxz = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
-    dataset_buffer_Txy = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    dataset_buffer_Txz = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
+    int nx_dset = H.nx_real;
+    int ny_dset = H.ny_real;
+    int nz_dset = H.nz_real;
+    hsize_t dims[2];
+    dataset_buffer_dxy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+    dataset_buffer_dxz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+    dataset_buffer_Txy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+    dataset_buffer_Txz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+  #ifdef DUST
+    dataset_buffer_dust_xy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+    dataset_buffer_dust_xz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+  #endif
 
     // Create the data space for the datasets
-    dims[0] = nx_dset;
-    dims[1] = ny_dset;
+    dims[0]         = nx_dset;
+    dims[1]         = ny_dset;
     dataspace_xy_id = H5Screate_simple(2, dims, NULL);
-    dims[1] = nz_dset;
+    dims[1]         = nz_dset;
     dataspace_xz_id = H5Screate_simple(2, dims, NULL);
 
     // Copy the xy density and temperature projections to the memory buffer
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
+    for (int j = 0; j < H.ny_real; j++) {
+      for (int i = 0; i < H.nx_real; i++) {
         dxy = 0;
         Txy = 0;
+  #ifdef DUST
+        dust_xy = 0;
+  #endif
         // for each xy element, sum over the z column
-        for (k=0; k<H.nz_real; k++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+        for (int k = 0; k < H.nz_real; k++) {
+          int const xid = i + H.n_ghost;
+          int const yid = j + H.n_ghost;
+          int const zid = k + H.n_ghost;
+          int const id  = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny);
+
           // sum density
-          dxy += C.density[id]*H.dz;
+          Real const d = C.density[id];
+          dxy += d * H.dz;
+  #ifdef DUST
+          dust_xy += C.dust_density[id] * H.dz;
+  #endif
           // calculate number density
-          n = C.density[id]*DENSITY_UNIT/(mu*MP);
-          // calculate temperature
-          #ifndef DE
-          Real mx = C.momentum_x[id];
-          Real my = C.momentum_y[id];
-          Real mz = C.momentum_z[id];
-          Real E = C.Energy[id];
-          T = (E - 0.5*(mx*mx + my*my + mz*mz)/C.density[id])*(gama-1.0)*PRESSURE_UNIT / (n*KB);
-          #endif
-          #ifdef DE
-          T = C.GasEnergy[id]*PRESSURE_UNIT*(gama-1.0) / (n*KB);
-          #endif
-          Txy += T*C.density[id]*H.dz;
+          Real const n = d * DENSITY_UNIT / (mu * MP);
+
+  // calculate temperature
+  #ifdef DE
+          Real const T = hydro_utilities::Calc_Temp_DE(C.GasEnergy[id], gama, n);
+  #else  // DE is not defined
+          Real const mx = C.momentum_x[id];
+          Real const my = C.momentum_y[id];
+          Real const mz = C.momentum_z[id];
+          Real const E  = C.Energy[id];
+
+    #ifdef MHD
+          auto const [magnetic_x, magnetic_y, magnetic_z] =
+              mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny);
+          Real const T =
+              hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z);
+    #else   // MHD is not defined
+          Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n);
+    #endif  // MHD
+  #endif    // DE
+
+          Txy += T * d * H.dz;
         }
-        buf_id = j + i*H.ny_real;
+        int const buf_id           = j + i * H.ny_real;
         dataset_buffer_dxy[buf_id] = dxy;
         dataset_buffer_Txy[buf_id] = Txy;
+  #ifdef DUST
+        dataset_buffer_dust_xy[buf_id] = dust_xy;
+  #endif
       }
     }
 
     // Copy the xz density and temperature projections to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (i=0; i<H.nx_real; i++) {
+    for (int k = 0; k < H.nz_real; k++) {
+      for (int i = 0; i < H.nx_real; i++) {
         dxz = 0;
         Txz = 0;
+  #ifdef DUST
+        dust_xz = 0;
+  #endif
         // for each xz element, sum over the y column
-        for (j=0; j<H.ny_real; j++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+        for (int j = 0; j < H.ny_real; j++) {
+          int const xid = i + H.n_ghost;
+          int const yid = j + H.n_ghost;
+          int const zid = k + H.n_ghost;
+          int const id  = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny);
           // sum density
-          dxz += C.density[id]*H.dy;
+          Real const d = C.density[id];
+          dxz += d * H.dy;
+  #ifdef DUST
+          dust_xz += C.dust_density[id] * H.dy;
+  #endif
           // calculate number density
-          n = C.density[id]*DENSITY_UNIT/(mu*MP);
-          // calculate temperature
-          #ifndef DE
-          Real mx = C.momentum_x[id];
-          Real my = C.momentum_y[id];
-          Real mz = C.momentum_z[id];
-          Real E = C.Energy[id];
-          T = (E - 0.5*(mx*mx + my*my + mz*mz)/C.density[id])*(gama-1.0)*PRESSURE_UNIT / (n*KB);
-          #endif
-          #ifdef DE
-          T = C.GasEnergy[id]*PRESSURE_UNIT*(gama-1.0) / (n*KB);
-          #endif
-          Txz += T*C.density[id]*H.dy;
+          Real const n = d * DENSITY_UNIT / (mu * MP);
+  #ifdef DE
+          Real const T = hydro_utilities::Calc_Temp_DE(C.GasEnergy[id], gama, n);
+  #else  // DE is not defined
+          Real const mx = C.momentum_x[id];
+          Real const my = C.momentum_y[id];
+          Real const mz = C.momentum_z[id];
+          Real const E  = C.Energy[id];
+
+    #ifdef MHD
+          auto const [magnetic_x, magnetic_y, magnetic_z] =
+              mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny);
+          Real const T =
+              hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z);
+    #else   // MHD is not defined
+          Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n);
+    #endif  // MHD
+  #endif    // DE
+          Txz += T * d * H.dy;
         }
-        buf_id = k + i*H.nz_real;
+        int const buf_id           = k + i * H.nz_real;
         dataset_buffer_dxz[buf_id] = dxz;
         dataset_buffer_Txz[buf_id] = Txz;
+  #ifdef DUST
+        dataset_buffer_dust_xz[buf_id] = dust_xz;
+  #endif
       }
     }
 
     // Write the projected density and temperature arrays to file
-    status = HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, "/d_xy");
-    status = HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz");
-    status = HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy");
-    status = HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txz, "/T_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dxy, "/d_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dxz, "/d_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_Txy, "/T_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_Txz, "/T_xz");
+  #ifdef DUST
+    status = Write_HDF5_Dataset(file_id, dataspace_xy_id, dataset_buffer_dust_xy, "/d_dust_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_xz_id, dataset_buffer_dust_xz, "/d_dust_xz");
+  #endif
 
     // Free the dataspace ids
     status = H5Sclose(dataspace_xz_id);
     status = H5Sclose(dataspace_xy_id);
+  } else {
+    printf("Projection write only works for 3D data.\n");
   }
-  else printf("Projection write only works for 3D data.\n");
 
   free(dataset_buffer_dxy);
   free(dataset_buffer_dxz);
   free(dataset_buffer_Txy);
   free(dataset_buffer_Txz);
-
+  #ifdef DUST
+  free(dataset_buffer_dust_xy);
+  free(dataset_buffer_dust_xz);
+  #endif  // DUST
 }
-#endif //HDF5
-
+#endif  // HDF5
 
 #ifdef HDF5
 /*! \fn void Write_Rotated_Projection_HDF5(hid_t file_id)
- *  \brief Write rotated projected data to a file, at the current simulation time. */
+ *  \brief Write rotated projected data to a file, at the current simulation
+ * time. */
 void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id)
 {
-  int i, j, k, id, buf_id;
-  hid_t     dataset_id, dataspace_xzr_id;
-  Real      *dataset_buffer_dxzr;
-  Real      *dataset_buffer_Txzr;
-  Real      *dataset_buffer_vxxzr;
-  Real      *dataset_buffer_vyxzr;
-  Real      *dataset_buffer_vzxzr;
-
-  herr_t    status;
+  hid_t dataset_id, dataspace_xzr_id;
+  Real *dataset_buffer_dxzr;
+  Real *dataset_buffer_Txzr;
+  Real *dataset_buffer_vxxzr;
+  Real *dataset_buffer_vyxzr;
+  Real *dataset_buffer_vzxzr;
+
+  herr_t status;
   Real dxy, dxz, Txy, Txz;
-  Real d, n, T, vx, vy, vz;
+  Real d, vx, vy, vz;
 
-  Real x, y, z;     //cell positions
-  Real xp, yp, zp;  //rotated positions
-  Real alpha, beta; //projected positions
-  int  ix, iz;      //projected index positions
+  Real x, y, z;      // cell positions
+  Real xp, yp, zp;   // rotated positions
+  Real alpha, beta;  // projected positions
+  int ix, iz;        // projected index positions
 
-  n = T = 0;
   Real mu = 0.6;
 
-  srand(137);     //initialize a random number
-  Real eps = 0.1; //randomize cell centers slightly to combat aliasing
+  srand(137);      // initialize a random number
+  Real eps = 0.1;  // randomize cell centers slightly to combat aliasing
 
   // 3D
-  if (H.nx>1 && H.ny>1 && H.nz>1) {
-
-    Real      Lx = R.Lx; //projected box size in x dir
-    Real      Lz = R.Lz; //projected box size in z dir
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
+    Real Lx     = R.Lx;  // projected box size in x dir
+    Real Lz     = R.Lz;  // projected box size in z dir
     int nx_dset = R.nx;
     int nz_dset = R.nz;
 
     if (R.nx * R.nz == 0) {
-      chprintf("WARNING: compiled with -DROTATED_PROJECTION but input parameters nxr or nzr = 0\n");
+      chprintf(
+          "WARNING: compiled with -DROTATED_PROJECTION but input parameters "
+          "nxr or nzr = 0\n");
       return;
     }
 
@@ -1829,117 +1712,121 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id)
     // this piece of the simulation volume
     // min and max values were set in the header write
     int nx_min, nx_max, nz_min, nz_max;
-    nx_min = R.nx_min;
-    nx_max = R.nx_max;
-    nz_min = R.nz_min;
-    nz_max = R.nz_max;
-    nx_dset = nx_max-nx_min;
-    nz_dset = nz_max-nz_min;
+    nx_min  = R.nx_min;
+    nx_max  = R.nx_max;
+    nz_min  = R.nz_min;
+    nz_max  = R.nz_max;
+    nx_dset = nx_max - nx_min;
+    nz_dset = nz_max - nz_min;
 
-    hsize_t   dims[2];
+    hsize_t dims[2];
 
     // allocate the buffers for the projected dataset
     // and initialize to zero
-    dataset_buffer_dxzr  = (Real *) calloc(nx_dset*nz_dset,sizeof(Real));
-    dataset_buffer_Txzr  = (Real *) calloc(nx_dset*nz_dset,sizeof(Real));
-    dataset_buffer_vxxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real));
-    dataset_buffer_vyxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real));
-    dataset_buffer_vzxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real));
+    dataset_buffer_dxzr  = (Real *)calloc(nx_dset * nz_dset, sizeof(Real));
+    dataset_buffer_Txzr  = (Real *)calloc(nx_dset * nz_dset, sizeof(Real));
+    dataset_buffer_vxxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real));
+    dataset_buffer_vyxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real));
+    dataset_buffer_vzxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real));
 
     // Create the data space for the datasets
-    dims[0] = nx_dset;
-    dims[1] = nz_dset;
+    dims[0]          = nx_dset;
+    dims[1]          = nz_dset;
     dataspace_xzr_id = H5Screate_simple(2, dims, NULL);
 
     // Copy the xz rotated projection to the memory buffer
-    for (k=0; k<H.nz_real; k++) {
-      for (i=0; i<H.nx_real; i++) {
-        for (j=0; j<H.ny_real; j++) {
-
-          //get cell index
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-
-          //get cell positions
-          Get_Position(i+H.n_ghost, j+H.n_ghost, k+H.n_ghost, &x, &y, &z);
-
-          //add very slight noise to locations
-          x += eps*H.dx * (drand48() - 0.5);
-          y += eps*H.dy * (drand48() - 0.5);
-          z += eps*H.dz * (drand48() - 0.5);
-
-          //rotate cell positions
-          rotate_point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp);
-
-          //find projected locations
-          //assumes box centered at [0,0,0]
-          alpha = (R.nx*(xp+0.5*R.Lx)/R.Lx);
-          beta  = (R.nz*(zp+0.5*R.Lz)/R.Lz);
-          ix = (int) round(alpha);
-          iz = (int) round(beta);
-          #ifdef MPI_CHOLLA
+    for (int k = 0; k < H.nz_real; k++) {
+      for (int i = 0; i < H.nx_real; i++) {
+        for (int j = 0; j < H.ny_real; j++) {
+          // get cell index
+          int const xid = i + H.n_ghost;
+          int const yid = j + H.n_ghost;
+          int const zid = k + H.n_ghost;
+          int const id  = cuda_utilities::compute1DIndex(xid, yid, zid, H.nx, H.ny);
+
+          // get cell positions
+          Get_Position(i + H.n_ghost, j + H.n_ghost, k + H.n_ghost, &x, &y, &z);
+
+          // add very slight noise to locations
+          x += eps * H.dx * (drand48() - 0.5);
+          y += eps * H.dy * (drand48() - 0.5);
+          z += eps * H.dz * (drand48() - 0.5);
+
+          // rotate cell positions
+          Rotate_Point(x, y, z, R.delta, R.phi, R.theta, &xp, &yp, &zp);
+
+          // find projected locations
+          // assumes box centered at [0,0,0]
+          alpha = (R.nx * (xp + 0.5 * R.Lx) / R.Lx);
+          beta  = (R.nz * (zp + 0.5 * R.Lz) / R.Lz);
+          ix    = (int)round(alpha);
+          iz    = (int)round(beta);
+  #ifdef MPI_CHOLLA
           ix = ix - nx_min;
           iz = iz - nz_min;
-          #endif
+  #endif
 
-          if((ix>=0)&&(ix<nx_dset)&&(iz>=0)&&(iz<nz_dset))
-          {
-            buf_id = iz + ix*nz_dset;
-            d = C.density[id];
+          if ((ix >= 0) && (ix < nx_dset) && (iz >= 0) && (iz < nz_dset)) {
+            int const buf_id = iz + ix * nz_dset;
+            d                = C.density[id];
             // project density
-            dataset_buffer_dxzr[buf_id] += d*H.dy;
+            dataset_buffer_dxzr[buf_id] += d * H.dy;
             // calculate number density
-            n = d*DENSITY_UNIT/(mu*MP);
-            // calculate temperature
-            #ifndef DE
-            Real mx = C.momentum_x[id];
-            Real my = C.momentum_y[id];
-            Real mz = C.momentum_z[id];
-            Real E = C.Energy[id];
-            T = (E - 0.5*(mx*mx + my*my + mz*mz)/C.density[id])*(gama-1.0)*PRESSURE_UNIT / (n*KB);
-            #endif
-            #ifdef DE
-            T = C.GasEnergy[id]*PRESSURE_UNIT*(gama-1.0) / (n*KB);
-            #endif
-            Txz = T*d*H.dy;
+            Real const n = d * DENSITY_UNIT / (mu * MP);
+
+  // calculate temperature
+  #ifdef DE
+            Real const T = hydro_utilities::Calc_Temp_DE(C.GasEnergy[id], gama, n);
+  #else  // DE is not defined
+            Real const mx = C.momentum_x[id];
+            Real const my = C.momentum_y[id];
+            Real const mz = C.momentum_z[id];
+            Real const E  = C.Energy[id];
+
+    #ifdef MHD
+            auto const [magnetic_x, magnetic_y, magnetic_z] =
+                mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny);
+            Real const T =
+                hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z);
+    #else   // MHD is not defined
+            Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n);
+    #endif  // MHD
+  #endif    // DE
+
+            Txz = T * d * H.dy;
             dataset_buffer_Txzr[buf_id] += Txz;
 
-            //compute velocities
-            vx = C.momentum_x[id];
-            dataset_buffer_vxxzr[buf_id] += vx*H.dy;
-            vy = C.momentum_y[id];
-            dataset_buffer_vyxzr[buf_id] += vy*H.dy;
-            vz = C.momentum_z[id];
-            dataset_buffer_vzxzr[buf_id] += vz*H.dy;
+            // compute velocities
+            dataset_buffer_vxxzr[buf_id] += C.momentum_x[id] * H.dy;
+            dataset_buffer_vyxzr[buf_id] += C.momentum_y[id] * H.dy;
+            dataset_buffer_vzxzr[buf_id] += C.momentum_z[id] * H.dy;
           }
         }
       }
     }
 
     // Write projected d,T,vx,vy,vz
-    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr");
-    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr");
-    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, "/vx_xzr");
-    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, "/vy_xzr");
-    status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, "/vz_xzr");
+    status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr");
+    status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr");
+    status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, "/vx_xzr");
+    status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, "/vy_xzr");
+    status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, "/vz_xzr");
 
     // Free the dataspace id
     status = H5Sclose(dataspace_xzr_id);
 
-    //free the data
+    // free the data
     free(dataset_buffer_dxzr);
     free(dataset_buffer_Txzr);
     free(dataset_buffer_vxxzr);
     free(dataset_buffer_vyxzr);
     free(dataset_buffer_vzxzr);
 
+  } else {
+    chprintf("Rotated projection write only implemented for 3D data.\n");
   }
-  else chprintf("Rotated projection write only implemented for 3D data.\n");
-
-
-
 }
-#endif //HDF5
-
+#endif  // HDF5
 
 #ifdef HDF5
 /*! \fn void Write_Slices_HDF5(hid_t file_id)
@@ -1948,81 +1835,103 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id)
 void Grid3D::Write_Slices_HDF5(hid_t file_id)
 {
   int i, j, k, id, buf_id;
-  hid_t     dataset_id, dataspace_id;
-  Real      *dataset_buffer_d;
-  Real      *dataset_buffer_mx;
-  Real      *dataset_buffer_my;
-  Real      *dataset_buffer_mz;
-  Real      *dataset_buffer_E;
+  hid_t dataset_id, dataspace_id;
+  Real *dataset_buffer_d;
+  Real *dataset_buffer_mx;
+  Real *dataset_buffer_my;
+  Real *dataset_buffer_mz;
+  Real *dataset_buffer_E;
   #ifdef DE
-  Real      *dataset_buffer_GE;
+  Real *dataset_buffer_GE;
   #endif
   #ifdef SCALAR
-  Real      *dataset_buffer_scalar;
+  Real *dataset_buffer_scalar;
   #endif
-  herr_t    status;
+  herr_t status;
   int xslice, yslice, zslice;
-  xslice = H.nx/2;
-  yslice = H.ny/2;
-  zslice = H.nz/2;
+  xslice = H.nx / 2;
+  yslice = H.ny / 2;
+  zslice = H.nz / 2;
   #ifdef MPI_CHOLLA
-  xslice = nx_global/2;
-  yslice = ny_global/2;
-  zslice = nz_global/2;
+  xslice = nx_global / 2;
+  yslice = ny_global / 2;
+  zslice = nz_global / 2;
   #endif
 
-
   // 3D
-  if (H.nx>1 && H.ny>1 && H.nz>1) {
-
-    int       nx_dset = H.nx_real;
-    int       ny_dset = H.ny_real;
-    int       nz_dset = H.nz_real;
-    hsize_t   dims[2];
-
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
+    int nx_dset = H.nx_real;
+    int ny_dset = H.ny_real;
+    int nz_dset = H.nz_real;
+    hsize_t dims[2];
 
     // Create the xy data space for the datasets
-    dims[0] = nx_dset;
-    dims[1] = ny_dset;
+    dims[0]      = nx_dset;
+    dims[1]      = ny_dset;
     dataspace_id = H5Screate_simple(2, dims, NULL);
 
     // Allocate memory for the xy slices
-    dataset_buffer_d  = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    dataset_buffer_mx = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    dataset_buffer_my = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    dataset_buffer_mz = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    dataset_buffer_E  = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    #ifdef DE
-    dataset_buffer_GE = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real));
-    #endif
-    #ifdef SCALAR
-    dataset_buffer_scalar = (Real *) malloc(NSCALARS*H.nx_real*H.ny_real*sizeof(Real));
-    #endif
+    dataset_buffer_d  = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+    dataset_buffer_mx = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+    dataset_buffer_my = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+    dataset_buffer_mz = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+    dataset_buffer_E  = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+  #ifdef MHD
+    std::vector<Real> dataset_buffer_magnetic_x(H.nx_real * H.ny_real);
+    std::vector<Real> dataset_buffer_magnetic_y(H.nx_real * H.ny_real);
+    std::vector<Real> dataset_buffer_magnetic_z(H.nx_real * H.ny_real);
+  #endif  // MHD
+  #ifdef DE
+    dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real));
+  #endif
+  #ifdef SCALAR
+    dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real));
+  #endif
 
     // Copy the xy slices to the memory buffers
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + zslice*H.nx*H.ny;
-        buf_id = j + i*H.ny_real;
-        #ifdef MPI_CHOLLA
-        // When there are multiple processes, check whether this slice is in your domain
-        if (zslice >= nz_local_start && zslice < nz_local_start+nz_local) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (zslice-nz_local_start+H.n_ghost)*H.nx*H.ny;
-        #endif //MPI_CHOLLA
+    for (j = 0; j < H.ny_real; j++) {
+      for (i = 0; i < H.nx_real; i++) {
+        id     = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice, H.nx, H.ny);
+        buf_id = j + i * H.ny_real;
+  #ifdef MHD
+        int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, j + H.n_ghost, zslice, H.nx, H.ny);
+        int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost - 1, zslice, H.nx, H.ny);
+        int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice - 1, H.nx, H.ny);
+  #endif  // MHD
+  #ifdef MPI_CHOLLA
+        // When there are multiple processes, check whether this slice is in
+        // your domain
+        if (zslice >= nz_local_start && zslice < nz_local_start + nz_local) {
+          id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice - nz_local_start + H.n_ghost, H.nx,
+                                              H.ny);
+    #ifdef MHD
+          int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, j + H.n_ghost,
+                                                      zslice - nz_local_start + H.n_ghost, H.nx, H.ny);
+          int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost - 1,
+                                                      zslice - nz_local_start + H.n_ghost, H.nx, H.ny);
+          int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost,
+                                                      zslice - nz_local_start + H.n_ghost - 1, H.nx, H.ny);
+    #endif  // MHD
+  #endif    // MPI_CHOLLA
           dataset_buffer_d[buf_id]  = C.density[id];
           dataset_buffer_mx[buf_id] = C.momentum_x[id];
           dataset_buffer_my[buf_id] = C.momentum_y[id];
           dataset_buffer_mz[buf_id] = C.momentum_z[id];
           dataset_buffer_E[buf_id]  = C.Energy[id];
-          #ifdef DE
+  #ifdef MHD
+          dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]);
+          dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]);
+          dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]);
+  #endif  // MHD
+  #ifdef DE
           dataset_buffer_GE[buf_id] = C.GasEnergy[id];
-          #endif
-          #ifdef SCALAR
-          for (int ii=0; ii<NSCALARS; ii++) {
-            dataset_buffer_scalar[buf_id+ii*H.nx*H.ny] = C.scalar[id+ii*H.n_cells];
+  #endif
+  #ifdef SCALAR
+          for (int ii = 0; ii < NSCALARS; ii++) {
+            dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = C.scalar[id + ii * H.n_cells];
           }
-          #endif
-        #ifdef MPI_CHOLLA
+  #endif
+  #ifdef MPI_CHOLLA
         }
         // if the slice isn't in your domain, just write out zeros
         else {
@@ -2031,31 +1940,41 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
           dataset_buffer_my[buf_id] = 0;
           dataset_buffer_mz[buf_id] = 0;
           dataset_buffer_E[buf_id]  = 0;
-          #ifdef DE
+    #ifdef MHD
+          dataset_buffer_magnetic_x[buf_id] = 0;
+          dataset_buffer_magnetic_y[buf_id] = 0;
+          dataset_buffer_magnetic_z[buf_id] = 0;
+    #endif  // MHD
+    #ifdef DE
           dataset_buffer_GE[buf_id] = 0;
-          #endif
-          #ifdef SCALAR
-          for (int ii=0; ii<NSCALARS; ii++) {
-            dataset_buffer_scalar[buf_id+ii*H.nx*H.ny] = 0;
+    #endif
+    #ifdef SCALAR
+          for (int ii = 0; ii < NSCALARS; ii++) {
+            dataset_buffer_scalar[buf_id + ii * H.nx * H.ny] = 0;
           }
-          #endif
+    #endif
         }
-        #endif // MPI_CHOLLA
+  #endif  // MPI_CHOLLA
       }
     }
 
     // Write out the xy datasets for each variable
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xy");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xy");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy");
-    #ifdef DE
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy");
-    #endif
-    #ifdef SCALAR
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xy");
-    #endif
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xy");
+  #ifdef MHD
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_x.data(), "/magnetic_x_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_y.data(), "/magnetic_y_xy");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_z.data(), "/magnetic_z_xy");
+  #endif  // MHD
+  #ifdef DE
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xy");
+  #endif
+  #ifdef SCALAR
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xy");
+  #endif
     // Free the dataspace id
     status = H5Sclose(dataspace_id);
 
@@ -2065,57 +1984,80 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
     free(dataset_buffer_my);
     free(dataset_buffer_mz);
     free(dataset_buffer_E);
-    #ifdef DE
+  #ifdef DE
     free(dataset_buffer_GE);
-    #endif
-    #ifdef SCALAR
+  #endif
+  #ifdef SCALAR
     free(dataset_buffer_scalar);
-    #endif
-
+  #endif
 
     // Create the xz data space for the datasets
-    dims[0] = nx_dset;
-    dims[1] = nz_dset;
+    dims[0]      = nx_dset;
+    dims[1]      = nz_dset;
     dataspace_id = H5Screate_simple(2, dims, NULL);
 
     // allocate the memory for the xz slices
-    dataset_buffer_d  = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
-    dataset_buffer_mx = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
-    dataset_buffer_my = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
-    dataset_buffer_mz = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
-    dataset_buffer_E  = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
-    #ifdef DE
-    dataset_buffer_GE = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real));
-    #endif
-    #ifdef SCALAR
-    dataset_buffer_scalar = (Real *) malloc(NSCALARS*H.nx_real*H.nz_real*sizeof(Real));
-    #endif
-
+    dataset_buffer_d  = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+    dataset_buffer_mx = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+    dataset_buffer_my = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+    dataset_buffer_mz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+    dataset_buffer_E  = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+  #ifdef MHD
+    dataset_buffer_magnetic_x.resize(H.nx_real * H.nz_real);
+    dataset_buffer_magnetic_y.resize(H.nx_real * H.nz_real);
+    dataset_buffer_magnetic_z.resize(H.nx_real * H.nz_real);
+  #endif  // MHD
+  #ifdef DE
+    dataset_buffer_GE = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real));
+  #endif
+  #ifdef SCALAR
+    dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.nz_real * sizeof(Real));
+  #endif
 
     // Copy the xz slices to the memory buffers
-    for (k=0; k<H.nz_real; k++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + yslice*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-        buf_id = k + i*H.nz_real;
-        #ifdef MPI_CHOLLA
-        // When there are multiple processes, check whether this slice is in your domain
-        if (yslice >= ny_local_start && yslice < ny_local_start+ny_local) {
-          id = (i+H.n_ghost) + (yslice-ny_local_start+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-        #endif //MPI_CHOLLA
-        dataset_buffer_d[buf_id]  = C.density[id];
-        dataset_buffer_mx[buf_id] = C.momentum_x[id];
-        dataset_buffer_my[buf_id] = C.momentum_y[id];
-        dataset_buffer_mz[buf_id] = C.momentum_z[id];
-        dataset_buffer_E[buf_id]  = C.Energy[id];
-        #ifdef DE
-        dataset_buffer_GE[buf_id] = C.GasEnergy[id];
-        #endif
-        #ifdef SCALAR
-        for (int ii=0; ii<NSCALARS; ii++) {
-          dataset_buffer_scalar[buf_id+ii*H.nx*H.nz] = C.scalar[id+ii*H.n_cells];
-        }
-        #endif
-        #ifdef MPI_CHOLLA
+    for (k = 0; k < H.nz_real; k++) {
+      for (i = 0; i < H.nx_real; i++) {
+        id     = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice, k + H.n_ghost, H.nx, H.ny);
+        buf_id = k + i * H.nz_real;
+  #ifdef MHD
+        int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, yslice, k + H.n_ghost, H.nx, H.ny);
+        int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - 1, k + H.n_ghost, H.nx, H.ny);
+        int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice, k + H.n_ghost - 1, H.nx, H.ny);
+  #endif  // MHD
+  #ifdef MPI_CHOLLA
+        // When there are multiple processes, check whether this slice is in
+        // your domain
+        if (yslice >= ny_local_start && yslice < ny_local_start + ny_local) {
+          id = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost, k + H.n_ghost, H.nx,
+                                              H.ny);
+    #ifdef MHD
+          int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, yslice - ny_local_start + H.n_ghost,
+                                                      k + H.n_ghost, H.nx, H.ny);
+          int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost - 1,
+                                                      k + H.n_ghost, H.nx, H.ny);
+          int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost,
+                                                      k + H.n_ghost - 1, H.nx, H.ny);
+    #endif  // MHD
+  #endif    // MPI_CHOLLA
+          dataset_buffer_d[buf_id]  = C.density[id];
+          dataset_buffer_mx[buf_id] = C.momentum_x[id];
+          dataset_buffer_my[buf_id] = C.momentum_y[id];
+          dataset_buffer_mz[buf_id] = C.momentum_z[id];
+          dataset_buffer_E[buf_id]  = C.Energy[id];
+  #ifdef MHD
+          dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]);
+          dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]);
+          dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]);
+  #endif  // MHD
+  #ifdef DE
+          dataset_buffer_GE[buf_id] = C.GasEnergy[id];
+  #endif
+  #ifdef SCALAR
+          for (int ii = 0; ii < NSCALARS; ii++) {
+            dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = C.scalar[id + ii * H.n_cells];
+          }
+  #endif
+  #ifdef MPI_CHOLLA
         }
         // if the slice isn't in your domain, just write out zeros
         else {
@@ -2124,31 +2066,41 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
           dataset_buffer_my[buf_id] = 0;
           dataset_buffer_mz[buf_id] = 0;
           dataset_buffer_E[buf_id]  = 0;
-          #ifdef DE
+    #ifdef MHD
+          dataset_buffer_magnetic_x[buf_id] = 0;
+          dataset_buffer_magnetic_y[buf_id] = 0;
+          dataset_buffer_magnetic_z[buf_id] = 0;
+    #endif  // MHD
+    #ifdef DE
           dataset_buffer_GE[buf_id] = 0;
-          #endif
-          #ifdef SCALAR
-          for (int ii=0; ii<NSCALARS; ii++) {
-            dataset_buffer_scalar[buf_id+ii*H.nx*H.nz] = 0;
+    #endif
+    #ifdef SCALAR
+          for (int ii = 0; ii < NSCALARS; ii++) {
+            dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = 0;
           }
-          #endif
+    #endif
         }
-        #endif // MPI_CHOLLA
+  #endif  // MPI_CHOLLA
       }
     }
 
     // Write out the xz datasets for each variable
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz");
-    #ifdef DE
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz");
-    #endif
-    #ifdef SCALAR
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xz");
-    #endif
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_xz");
+  #ifdef MHD
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_x.data(), "/magnetic_x_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_y.data(), "/magnetic_y_xz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_z.data(), "/magnetic_z_xz");
+  #endif  // MHD
+  #ifdef DE
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_xz");
+  #endif
+  #ifdef SCALAR
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_xz");
+  #endif
 
     // Free the dataspace id
     status = H5Sclose(dataspace_id);
@@ -2159,57 +2111,79 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
     free(dataset_buffer_my);
     free(dataset_buffer_mz);
     free(dataset_buffer_E);
-    #ifdef DE
+  #ifdef DE
     free(dataset_buffer_GE);
-    #endif
-    #ifdef SCALAR
+  #endif
+  #ifdef SCALAR
     free(dataset_buffer_scalar);
-    #endif
-
+  #endif
 
     // Create the yz data space for the datasets
-    dims[0] = ny_dset;
-    dims[1] = nz_dset;
+    dims[0]      = ny_dset;
+    dims[1]      = nz_dset;
     dataspace_id = H5Screate_simple(2, dims, NULL);
 
     // allocate the memory for the yz slices
-    dataset_buffer_d  = (Real *) malloc(H.ny_real*H.nz_real*sizeof(Real));
-    dataset_buffer_mx = (Real *) malloc(H.ny_real*H.nz_real*sizeof(Real));
-    dataset_buffer_my = (Real *) malloc(H.ny_real*H.nz_real*sizeof(Real));
-    dataset_buffer_mz = (Real *) malloc(H.ny_real*H.nz_real*sizeof(Real));
-    dataset_buffer_E  = (Real *) malloc(H.ny_real*H.nz_real*sizeof(Real));
-    #ifdef DE
-    dataset_buffer_GE = (Real *) malloc(H.ny_real*H.nz_real*sizeof(Real));
-    #endif
-    #ifdef SCALAR
-    dataset_buffer_scalar = (Real *) malloc(NSCALARS*H.ny_real*H.nz_real*sizeof(Real));
-    #endif
-
+    dataset_buffer_d  = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real));
+    dataset_buffer_mx = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real));
+    dataset_buffer_my = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real));
+    dataset_buffer_mz = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real));
+    dataset_buffer_E  = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real));
+  #ifdef MHD
+    dataset_buffer_magnetic_x.resize(H.ny_real * H.nz_real);
+    dataset_buffer_magnetic_y.resize(H.ny_real * H.nz_real);
+    dataset_buffer_magnetic_z.resize(H.ny_real * H.nz_real);
+  #endif  // MHD
+  #ifdef DE
+    dataset_buffer_GE = (Real *)malloc(H.ny_real * H.nz_real * sizeof(Real));
+  #endif
+  #ifdef SCALAR
+    dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.ny_real * H.nz_real * sizeof(Real));
+  #endif
 
     // Copy the yz slices to the memory buffers
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = xslice + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-        buf_id = k + j*H.nz_real;
-        #ifdef MPI_CHOLLA
-        // When there are multiple processes, check whether this slice is in your domain
-        if (xslice >= nx_local_start && xslice < nx_local_start+nx_local) {
-          id = (xslice-nx_local_start) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-        #endif //MPI_CHOLLA
-        dataset_buffer_d[buf_id]  = C.density[id];
-        dataset_buffer_mx[buf_id] = C.momentum_x[id];
-        dataset_buffer_my[buf_id] = C.momentum_y[id];
-        dataset_buffer_mz[buf_id] = C.momentum_z[id];
-        dataset_buffer_E[buf_id]  = C.Energy[id];
-        #ifdef DE
-        dataset_buffer_GE[buf_id] = C.GasEnergy[id];
-        #endif
-        #ifdef SCALAR
-        for (int ii=0; ii<NSCALARS; ii++) {
-          dataset_buffer_scalar[buf_id+ii*H.ny*H.nz] = C.scalar[id+ii*H.n_cells];
-        }
-        #endif
-        #ifdef MPI_CHOLLA
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id     = cuda_utilities::compute1DIndex(xslice, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny);
+        buf_id = k + j * H.nz_real;
+  #ifdef MHD
+        int id_xm1 = cuda_utilities::compute1DIndex(xslice - 1, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny);
+        int id_ym1 = cuda_utilities::compute1DIndex(xslice, j + H.n_ghost - 1, k + H.n_ghost, H.nx, H.ny);
+        int id_zm1 = cuda_utilities::compute1DIndex(xslice, j + H.n_ghost, k + H.n_ghost - 1, H.nx, H.ny);
+  #endif  // MHD
+  #ifdef MPI_CHOLLA
+        // When there are multiple processes, check whether this slice is in
+        // your domain
+        if (xslice >= nx_local_start && xslice < nx_local_start + nx_local) {
+          id = cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny);
+    #ifdef MHD
+          int id_xm1 =
+              cuda_utilities::compute1DIndex(xslice - nx_local_start - 1, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny);
+          int id_ym1 =
+              cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost - 1, k + H.n_ghost, H.nx, H.ny);
+          int id_zm1 =
+              cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost - 1, H.nx, H.ny);
+    #endif  // MHD
+  #endif    // MPI_CHOLLA
+          dataset_buffer_d[buf_id]  = C.density[id];
+          dataset_buffer_mx[buf_id] = C.momentum_x[id];
+          dataset_buffer_my[buf_id] = C.momentum_y[id];
+          dataset_buffer_mz[buf_id] = C.momentum_z[id];
+          dataset_buffer_E[buf_id]  = C.Energy[id];
+  #ifdef MHD
+          dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]);
+          dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]);
+          dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]);
+  #endif  // MHD
+  #ifdef DE
+          dataset_buffer_GE[buf_id] = C.GasEnergy[id];
+  #endif
+  #ifdef SCALAR
+          for (int ii = 0; ii < NSCALARS; ii++) {
+            dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = C.scalar[id + ii * H.n_cells];
+          }
+  #endif
+  #ifdef MPI_CHOLLA
         }
         // if the slice isn't in your domain, just write out zeros
         else {
@@ -2218,32 +2192,41 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
           dataset_buffer_my[buf_id] = 0;
           dataset_buffer_mz[buf_id] = 0;
           dataset_buffer_E[buf_id]  = 0;
-          #ifdef DE
+    #ifdef MHD
+          dataset_buffer_magnetic_x[buf_id] = 0;
+          dataset_buffer_magnetic_y[buf_id] = 0;
+          dataset_buffer_magnetic_z[buf_id] = 0;
+    #endif  // MHD
+    #ifdef DE
           dataset_buffer_GE[buf_id] = 0;
-          #endif
-          #ifdef SCALAR
-          for (int ii=0; ii<NSCALARS; ii++) {
-            dataset_buffer_scalar[buf_id+ii*H.ny*H.nz] = 0;
+    #endif
+    #ifdef SCALAR
+          for (int ii = 0; ii < NSCALARS; ii++) {
+            dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = 0;
           }
-          #endif
+    #endif
         }
-        #endif // MPI_CHOLLA
+  #endif  // MPI_CHOLLA
       }
     }
 
-
     // Write out the yz datasets for each variable
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_yz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_yz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz");
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz");
-    #ifdef DE
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz");
-    #endif
-    #ifdef SCALAR
-    status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_yz");
-    #endif
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_d, "/d_yz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mx, "/mx_yz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_my, "/my_yz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_mz, "/mz_yz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_E, "/E_yz");
+  #ifdef MHD
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_x.data(), "/magnetic_x_yz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_y.data(), "/magnetic_y_yz");
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_magnetic_z.data(), "/magnetic_z_yz");
+  #endif  // MHD
+  #ifdef DE
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_GE, "/GE_yz");
+  #endif
+  #ifdef SCALAR
+    status = Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer_scalar, "/scalar_yz");
+  #endif
 
     // Free the dataspace id
     status = H5Sclose(dataspace_id);
@@ -2254,51 +2237,49 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id)
     free(dataset_buffer_my);
     free(dataset_buffer_mz);
     free(dataset_buffer_E);
-    #ifdef DE
+  #ifdef DE
     free(dataset_buffer_GE);
-    #endif
-    #ifdef SCALAR
+  #endif
+  #ifdef SCALAR
     free(dataset_buffer_scalar);
-    #endif
-
+  #endif
 
+  } else {
+    printf("Slice write only works for 3D data.\n");
   }
-  else printf("Slice write only works for 3D data.\n");
-
 }
-#endif //HDF5
-
+#endif  // HDF5
 
-/*! \fn void Read_Grid(struct parameters P)
+/*! \fn void Read_Grid(struct Parameters P)
  *  \brief Read in grid data from an output file. */
-void Grid3D::Read_Grid(struct parameters P) {
-
-  char filename[100];
-  char timestep[20];
-  int nfile = P.nfile; //output step you want to read from
+void Grid3D::Read_Grid(struct Parameters P)
+{
+  ScopedTimer timer("Read_Grid");
+  int nfile = P.nfile;  // output step you want to read from
 
   // create the filename to read from
   // assumes your data is in the outdir specified in the input file
   // strcpy(filename, P.outdir);
   // Changed to read initial conditions from indir
-  strcpy(filename, P.indir);
-  sprintf(timestep, "%d", nfile);
-  strcat(filename,timestep);
-  #if defined BINARY
-  strcat(filename,".bin");
-  #elif defined HDF5
-  strcat(filename,".h5");
-  #endif  // BINARY or HDF5
-  // for now assumes you will run on the same number of processors
-  #ifdef MPI_CHOLLA
+  std::string filename(P.indir);
+  filename += std::to_string(P.nfile);
+
+#if defined BINARY
+  filename += ".bin";
+#elif defined HDF5
+  filename += ".h5";
+#endif  // BINARY or HDF5
+// for now assumes you will run on the same number of processors
+#ifdef MPI_CHOLLA
   #ifdef TILED_INITIAL_CONDITIONS
-  sprintf(filename,"%sics_%dMpc_%d.h5", P.indir, (int) P.tile_length/1000, H.nx_real); //Everyone reads the same file
-  #else  // TILED_INITIAL_CONDITIONS is not defined
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif //TILED_INITIAL_CONDITIONS
-  #endif  //MPI_CHOLLA
-
-  #if defined BINARY
+  sprintf(filename, "%sics_%dMpc_%d.h5", P.indir, (int)P.tile_length / 1000,
+          H.nx_real);  // Everyone reads the same file
+  #else                // TILED_INITIAL_CONDITIONS is not defined
+  filename += "." + std::to_string(procID);
+  #endif               // TILED_INITIAL_CONDITIONS
+#endif                 // MPI_CHOLLA
+
+#if defined BINARY
   FILE *fp;
   // open the file
   fp = fopen(filename, "r");
@@ -2313,14 +2294,14 @@ void Grid3D::Read_Grid(struct parameters P) {
   // close the file
   fclose(fp);
 
-  #elif defined HDF5
-  hid_t  file_id;
-  herr_t  status;
+#elif defined HDF5
+  hid_t file_id;
+  herr_t status;
 
   // open the file
-  file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+  file_id = H5Fopen(filename.data(), H5F_ACC_RDONLY, H5P_DEFAULT);
   if (file_id < 0) {
-    printf("Unable to open input file: %s\n", filename);
+    std::cout << "Unable to open input file: " << filename << std::endl;
     exit(0);
   }
 
@@ -2329,12 +2310,9 @@ void Grid3D::Read_Grid(struct parameters P) {
 
   // close the file
   status = H5Fclose(file_id);
-  #endif  // BINARY or HDF5
-
-
+#endif  // BINARY or HDF5
 }
 
-
 /*! \fn Read_Grid_Binary(FILE *fp)
  *  \brief Read in grid data from a binary file. */
 void Grid3D::Read_Grid_Binary(FILE *fp)
@@ -2366,960 +2344,389 @@ void Grid3D::Read_Grid_Binary(FILE *fp)
   rs = fread(&H.t, sizeof(Real), 1, fp);
   rs = fread(&H.dt, sizeof(Real), 1, fp);
   rs = fread(&H.t_wall, sizeof(Real), 1, fp);
-  rs =fread(&H.n_step, sizeof(int), 1, fp);
-
+  rs = fread(&H.n_step, sizeof(int), 1, fp);
 
-  // Read in the conserved quantities from the input file
-  #ifdef WITH_GHOST
-  fread(&(C.density[id]),    sizeof(Real), H.n_cells, fp);
+// Read in the conserved quantities from the input file
+#ifdef WITH_GHOST
+  fread(&(C.density[id]), sizeof(Real), H.n_cells, fp);
   fread(&(C.momentum_x[id]), sizeof(Real), H.n_cells, fp);
   fread(&(C.momentum_y[id]), sizeof(Real), H.n_cells, fp);
   fread(&(C.momentum_z[id]), sizeof(Real), H.n_cells, fp);
-  fread(&(C.Energy[id]),     sizeof(Real), H.n_cells, fp);
-  #endif //WITH_GHOST
+  fread(&(C.Energy[id]), sizeof(Real), H.n_cells, fp);
+#endif  // WITH_GHOST
 
-  #ifdef NO_GHOST
+#ifdef NO_GHOST
   // 1D case
-  if (H.nx>1 && H.ny==1 && H.nz==1) {
-
+  if (H.nx > 1 && H.ny == 1 && H.nz == 1) {
     id = H.n_ghost;
 
-    fread(&(C.density[id]),    sizeof(Real), H.nx_real, fp);
+    fread(&(C.density[id]), sizeof(Real), H.nx_real, fp);
     fread(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp);
     fread(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp);
     fread(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp);
-    fread(&(C.Energy[id]),     sizeof(Real), H.nx_real, fp);
-    #ifdef DE
-    fread(&(C.GasEnergy[id]),  sizeof(Real), H.nx_real, fp);
-    #endif
+    fread(&(C.Energy[id]), sizeof(Real), H.nx_real, fp);
+  #ifdef DE
+    fread(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp);
+  #endif
   }
 
   // 2D case
-  else if (H.nx>1 && H.ny>1 && H.nz==1) {
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+  else if (H.nx > 1 && H.ny > 1 && H.nz == 1) {
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fread(&(C.density[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fread(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fread(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fread(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp);
     }
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fread(&(C.Energy[id]), sizeof(Real), H.nx_real, fp);
     }
-    #ifdef DE
-    for (j=0; j<H.ny_real; j++) {
-      id = H.n_ghost + (j+H.n_ghost)*H.nx;
+  #ifdef DE
+    for (j = 0; j < H.ny_real; j++) {
+      id = H.n_ghost + (j + H.n_ghost) * H.nx;
       fread(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp);
     }
-    #endif
+  #endif
   }
 
   // 3D case
   else {
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fread(&(C.density[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fread(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fread(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fread(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fread(&(C.Energy[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    #ifdef DE
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        id = H.n_ghost + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
+  #ifdef DE
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        id = H.n_ghost + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
         fread(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp);
       }
     }
-    #endif
-
-  }
   #endif
-
+  }
+#endif
 }
 
-
-
 #ifdef HDF5
-/*! \fn void Read_Grid_HDF5(hid_t file_id)
- *  \brief Read in grid data from an hdf5 file. */
-void Grid3D::Read_Grid_HDF5(hid_t file_id, struct parameters P)
-{
-  int i, j, k, id, buf_id;
-  hid_t     attribute_id, dataset_id;
-  Real      *dataset_buffer;
-  herr_t    status;
-
-  // Read in header values not set by grid initialization
-  attribute_id = H5Aopen(file_id, "gamma", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &gama);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Aopen(file_id, "dt", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.dt);
-  status = H5Aclose(attribute_id);
-  attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step);
-  status = H5Aclose(attribute_id);
-
-  // 1D case
-  if (H.nx>1 && H.ny==1 && H.nz==1) {
-
-    // need a dataset buffer to remap fastest index
-    dataset_buffer = (Real *) malloc(H.nx_real*sizeof(Real));
-
-    // Open the density dataset
-    dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT);
-    // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the density array to the grid
-    id = H.n_ghost;
-    memcpy(&(C.density[id]), &dataset_buffer[0], H.nx_real*sizeof(Real));
-
-
-    // Open the x momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT);
-    // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the x momentum array to the grid
-    id = H.n_ghost;
-    memcpy(&(C.momentum_x[id]), &dataset_buffer[0], H.nx_real*sizeof(Real));
-
-
-    // Open the y momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT);
-    // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the y momentum array to the grid
-    id = H.n_ghost;
-    memcpy(&(C.momentum_y[id]), &dataset_buffer[0], H.nx_real*sizeof(Real));
 
+/* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */
+void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                Real *hdf5_buffer, Real *grid_buffer)
+{
+  // Note: for 1D ny_real and nz_real are not used
+  // And for 2D nz_real is not used.
+  // This protects the magnetic case where ny_real/nz_real += 1
 
-    // Open the z momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT);
-    // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the z momentum array to the grid
-    id = H.n_ghost;
-    memcpy(&(C.momentum_z[id]), &dataset_buffer[0], H.nx_real*sizeof(Real));
-
-
-    // Open the Energy dataset
-    dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT);
-    // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the Energy array to the grid
-    id = H.n_ghost;
-    memcpy(&(C.Energy[id]), &dataset_buffer[0], H.nx_real*sizeof(Real));
-
-
-    #ifdef DE
-    // Open the internal energy dataset
-    dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT);
-    // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the internal energy array to the grid
-    id = H.n_ghost;
-    memcpy(&(C.GasEnergy[id]), &dataset_buffer[0], H.nx_real*sizeof(Real));
-    #endif  //DE
-
-    #ifdef SCALAR
-    for (int s=0; s<NSCALARS; s++) {
-      // create the name of the dataset
-      char dataset[100];
-      char number[10];
-      strcpy(dataset, "/scalar");
-      sprintf(number, "%d", s);
-      strcat(dataset,number);
-
-      // Open the passive scalar dataset
-      dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT);
-      // Read the scalar array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-
-      // Copy the scalar array to the grid
-      id = H.n_ghost;
-      memcpy(&(C.scalar[id + s*H.n_cells]), &dataset_buffer[0], H.nx_real*sizeof(Real));
+  int i, j, k, id, buf_id;
+  // 3D case
+  if (nx > 1 && ny > 1 && nz > 1) {
+    for (k = 0; k < nz_real; k++) {
+      for (j = 0; j < ny_real; j++) {
+        for (i = 0; i < nx_real; i++) {
+          id              = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny;
+          buf_id          = k + j * nz_real + i * nz_real * ny_real;
+          grid_buffer[id] = hdf5_buffer[buf_id];
+        }
+      }
     }
-    #endif  //SCALAR
+    return;
   }
 
   // 2D case
-  if (H.nx>1 && H.ny>1 && H.nz==1) {
-
-    // need a dataset buffer to remap fastest index
-    dataset_buffer = (Real *) malloc(H.ny_real*H.nx_real*sizeof(Real));
-
-
-    // Open the density dataset
-    dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT);
-    // Read the density array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the density array to the grid
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        C.density[id] = dataset_buffer[buf_id];
-      }
-    }
-
-
-    // Open the x momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT);
-    // Read the x momentum array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the x momentum array to the grid
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        C.momentum_x[id] = dataset_buffer[buf_id];
-      }
-    }
-
-
-    // Open the y momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT);
-    // Read the y momentum array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the y momentum array to the grid
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        C.momentum_y[id] = dataset_buffer[buf_id];
+  if (nx > 1 && ny > 1 && nz == 1) {
+    for (j = 0; j < ny_real; j++) {
+      for (i = 0; i < nx_real; i++) {
+        id              = (i + n_ghost) + (j + n_ghost) * nx;
+        buf_id          = j + i * ny_real;
+        grid_buffer[id] = hdf5_buffer[buf_id];
       }
     }
+    return;
+  }
 
+  // 1D case
+  if (nx > 1 && ny == 1 && nz == 1) {
+    id = n_ghost;
+    memcpy(&grid_buffer[id], &hdf5_buffer[0], nx_real * sizeof(Real));
+    return;
+  }
+}
 
-    // Open the z momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT);
-    // Read the z momentum array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    // Copy the z momentum array to the grid
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        C.momentum_z[id] = dataset_buffer[buf_id];
-      }
-    }
-
+void Read_Grid_HDF5_Field(hid_t file_id, Real *dataset_buffer, Header H, Real *grid_buffer, const char *name)
+{
+  Read_HDF5_Dataset(file_id, dataset_buffer, name);
+  Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer);
+}
 
-    // Open the Energy dataset
-    dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT);
-    // Read the Energy array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+void Read_Grid_HDF5_Field_Magnetic(hid_t file_id, Real *dataset_buffer, Header H, Real *grid_buffer, const char *name)
+{
+  // Magnetic has 1 more real cell, 1 fewer n_ghost on one side.
+  Read_HDF5_Dataset(file_id, dataset_buffer, name);
+  Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1,
+                             dataset_buffer, grid_buffer);
+}
 
-    // Copy the Energy array to the grid
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        C.Energy[id] = dataset_buffer[buf_id];
-      }
-    }
+/*! \fn void Read_Grid_HDF5(hid_t file_id)
+ *  \brief Read in grid data from an hdf5 file. */
+void Grid3D::Read_Grid_HDF5(hid_t file_id, struct Parameters P)
+{
+  int i, j, k, id, buf_id;
+  hid_t attribute_id, dataset_id;
+  Real *dataset_buffer;
+  herr_t status;
 
+  // Read in header values not set by grid initialization
+  attribute_id = H5Aopen(file_id, "gamma", H5P_DEFAULT);
+  status       = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &gama);
+  status       = H5Aclose(attribute_id);
+  attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT);
+  status       = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t);
+  status       = H5Aclose(attribute_id);
+  attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT);
+  status       = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step);
+  status       = H5Aclose(attribute_id);
 
-    #ifdef DE
-    // Open the internal energy dataset
-    dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT);
-    // Read the internal energy array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
+  #ifdef MHD
+  dataset_buffer = (Real *)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real));
+  #else
+  dataset_buffer = (Real *)malloc((H.nz_real) * (H.ny_real) * (H.nx_real) * sizeof(Real));
+  #endif
 
-    // Copy the internal energy array to the grid
-    for (j=0; j<H.ny_real; j++) {
-      for (i=0; i<H.nx_real; i++) {
-        id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-        buf_id = j + i*H.ny_real;
-        C.GasEnergy[id] = dataset_buffer[buf_id];
-      }
-    }
-    #endif  //DE
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.density, "/density");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.momentum_x, "/momentum_x");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.momentum_y, "/momentum_y");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.momentum_z, "/momentum_z");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.Energy, "/Energy");
+  #ifdef DE
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.GasEnergy, "/GasEnergy");
+  #endif
 
+  #ifdef SCALAR
 
-    #ifdef SCALAR
-    for (int s=0; s<NSCALARS; s++) {
-      // create the name of the dataset
-      char dataset[100];
-      char number[10];
-      strcpy(dataset, "/scalar");
-      sprintf(number, "%d", s);
-      strcat(dataset,number);
-
-      // Open the scalar dataset
-      dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT);
-      // Read the scalar array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-
-      // Copy the scalar array to the grid
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx;
-          buf_id = j + i*H.ny_real;
-          C.scalar[id+s*H.n_cells] = dataset_buffer[buf_id];
-        }
-      }
-    }
-    #endif  //SCALAR
-  }
+    #ifdef BASIC_SCALAR
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.scalar, "/scalar0");
+    #endif  // BASIC_SCALAR
+
+    #ifdef DUST
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.dust_density, "/dust_density");
+    #endif  // DUST
+
+    #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU)
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HI_density, "/HI_density");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HII_density, "/HII_density");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeI_density, "/HeI_density");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeII_density, "/HeII_density");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.HeIII_density, "/HeIII_density");
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.e_density, "/e_density");
+      #ifdef GRACKLE_METALS
+  Read_Grid_HDF5_Field(file_id, dataset_buffer, H, C.metal_density, "/metal_density");
+      #endif  // GRACKLE_METALS
+    #endif    // COOLING_GRACKLE , CHEMISTRY_GPU
 
-  // 3D case
-  if (H.nx>1 && H.ny>1 && H.nz>1) {
+  #endif  // SCALAR
 
+  // MHD only valid in 3D case
+  if (H.nx > 1 && H.ny > 1 && H.nz > 1) {
     // Compute Statistic of Initial data
     Real mean_l, min_l, max_l;
     Real mean_g, min_g, max_g;
 
-    // need a dataset buffer to remap fastest index
-    dataset_buffer = (Real *) malloc(H.nz_real*H.ny_real*H.nx_real*sizeof(Real));
-
-
-    // Open the density dataset
-    dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT);
-    // Read the density array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-
-    mean_l = 0;
-    min_l = 1e65;
-    max_l = -1;
-
-    // Copy the density array to the grid
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          C.density[id] = dataset_buffer[buf_id];
-          mean_l += C.density[id];
-          if ( C.density[id] > max_l ) max_l = C.density[id];
-          if ( C.density[id] < min_l ) min_l = C.density[id];
-        }
-      }
-    }
-    mean_l /= ( H.nz_real * H.ny_real * H.nx_real );
-
-    #if MPI_CHOLLA
-    mean_g = ReduceRealAvg( mean_l );
-    max_g = ReduceRealMax( max_l );
-    min_g = ReduceRealMin( min_l );
-    mean_l = mean_g;
-    max_l = max_g;
-    min_l = min_g;
-    #endif  //MPI_CHOLLA
-
-    #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-    chprintf( " Density  Mean: %f   Min: %f   Max: %f      [ h^2 Msun kpc^-3] \n", mean_l, min_l, max_l );
-    #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-
-    // Open the x momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT);
-    // Read the x momentum array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    mean_l = 0;
-    min_l = 1e65;
-    max_l = -1;
-    // Copy the x momentum array to the grid
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          C.momentum_x[id] = dataset_buffer[buf_id];
-          mean_l += fabs(C.momentum_x[id]);
-          if ( fabs(C.momentum_x[id]) > max_l ) max_l = fabs(C.momentum_x[id]);
-          if ( fabs(C.momentum_x[id]) < min_l ) min_l = fabs(C.momentum_x[id]);
-        }
-      }
-    }
-    mean_l /= ( H.nz_real * H.ny_real * H.nx_real );
-
-    #if MPI_CHOLLA
-    mean_g = ReduceRealAvg( mean_l );
-    max_g = ReduceRealMax( max_l );
-    min_g = ReduceRealMin( min_l );
-    mean_l = mean_g;
-    max_l = max_g;
-    min_l = min_g;
-    #endif  //MPI_CHOLLA
-
-    #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-    chprintf( " abs(Momentum X)  Mean: %f   Min: %f   Max: %f      [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l );
-    #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-    // Open the y momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT);
-    // Read the y momentum array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-    status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    // Free the dataset id
-    status = H5Dclose(dataset_id);
-
-    mean_l = 0;
-    min_l = 1e65;
-    max_l = -1;
-    // Copy the y momentum array to the grid
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          C.momentum_y[id] = dataset_buffer[buf_id];
-          mean_l += fabs(C.momentum_y[id]);
-          if ( fabs(C.momentum_y[id]) > max_l ) max_l = fabs(C.momentum_y[id]);
-          if ( fabs(C.momentum_y[id]) < min_l ) min_l = fabs(C.momentum_y[id]);
-        }
-      }
-    }
-    mean_l /= ( H.nz_real * H.ny_real * H.nx_real );
-
-    #if MPI_CHOLLA
-    mean_g = ReduceRealAvg( mean_l );
-    max_g = ReduceRealMax( max_l );
-    min_g = ReduceRealMin( min_l );
-    mean_l = mean_g;
-    max_l = max_g;
-    min_l = min_g;
-    #endif  //MPI_CHOLLA
-
-    #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-    chprintf( " abs(Momentum Y)  Mean: %f   Min: %f   Max: %f      [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l );
-    #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-
-    // Open the z momentum dataset
-    dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT);
-    // Read the z momentum array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
+  #ifdef MHD
+    // Open the x magnetic field dataset
+    dataset_id = H5Dopen(file_id, "/magnetic_x", H5P_DEFAULT);
+    // Read the x magnetic field array into the dataset buffer  // NOTE: NEED TO
+    // FIX FOR FLOAT REAL!!!
     status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
     // Free the dataset id
     status = H5Dclose(dataset_id);
 
     mean_l = 0;
-    min_l = 1e65;
-    max_l = -1;
-    // Copy the z momentum array to the grid
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          C.momentum_z[id] = dataset_buffer[buf_id];
-          mean_l += fabs(C.momentum_z[id]);
-          if ( fabs(C.momentum_z[id]) > max_l ) max_l = fabs(C.momentum_z[id]);
-          if ( fabs(C.momentum_z[id]) < min_l ) min_l = fabs(C.momentum_z[id]);
+    min_l  = 1e65;
+    max_l  = -1;
+    // Copy the x magnetic field array to the grid
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        for (i = 0; i < H.nx_real + 1; i++) {
+          id               = (i + H.n_ghost - 1) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
+          buf_id           = k + j * (H.nz_real) + i * (H.nz_real) * (H.ny_real);
+          C.magnetic_x[id] = dataset_buffer[buf_id];
+
+          mean_l += std::abs(C.magnetic_x[id]);
+          max_l = std::max(max_l, std::abs(C.magnetic_x[id]));
+          min_l = std::min(min_l, std::abs(C.magnetic_x[id]));
         }
       }
     }
-    mean_l /= ( H.nz_real * H.ny_real * H.nx_real );
+    mean_l /= ((H.nz_real + 1) * (H.ny_real) * (H.nx_real));
 
     #if MPI_CHOLLA
-    mean_g = ReduceRealAvg( mean_l );
-    max_g = ReduceRealMax( max_l );
-    min_g = ReduceRealMin( min_l );
+    mean_g = ReduceRealAvg(mean_l);
+    max_g  = ReduceRealMax(max_l);
+    min_g  = ReduceRealMin(min_l);
     mean_l = mean_g;
-    max_l = max_g;
-    min_l = min_g;
-    #endif  //MPI_CHOLLA
+    max_l  = max_g;
+    min_l  = min_g;
+    #endif  // MPI_CHOLLA
 
     #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-    chprintf( " abs(Momentum Z)  Mean: %f   Min: %f   Max: %f      [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l );
-    #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-
-    // Open the Energy dataset
-    dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT);
-    // Read the Energy array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
+    chprintf(
+        " abs(Magnetic X)  Mean: %f   Min: %f   Max: %f      [ Msun^1/2 "
+        "kpc^-1/2 s^-1] \n",
+        mean_l, min_l, max_l);
+    #endif  // PRINT_INITIAL_STATS and COSMOLOGY
+
+    // Open the y magnetic field dataset
+    dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT);
+    // Read the y magnetic field array into the dataset buffer  // NOTE: NEED TO
+    // FIX FOR FLOAT REAL!!!
     status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
     // Free the dataset id
     status = H5Dclose(dataset_id);
 
     mean_l = 0;
-    min_l = 1e65;
-    max_l = -1;
-    // Copy the Energy array to the grid
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          C.Energy[id] = dataset_buffer[buf_id];
-          mean_l += C.Energy[id];
-          if ( C.Energy[id] > max_l ) max_l = C.Energy[id];
-          if ( C.Energy[id] < min_l ) min_l = C.Energy[id];
+    min_l  = 1e65;
+    max_l  = -1;
+    // Copy the y magnetic field array to the grid
+    for (k = 0; k < H.nz_real; k++) {
+      for (j = 0; j < H.ny_real + 1; j++) {
+        for (i = 0; i < H.nx_real; i++) {
+          id               = (i + H.n_ghost) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost) * H.nx * H.ny;
+          buf_id           = k + j * (H.nz_real) + i * (H.nz_real) * (H.ny_real + 1);
+          C.magnetic_y[id] = dataset_buffer[buf_id];
+
+          mean_l += std::abs(C.magnetic_x[id]);
+          max_l = std::max(max_l, std::abs(C.magnetic_x[id]));
+          min_l = std::min(min_l, std::abs(C.magnetic_x[id]));
         }
       }
     }
-    mean_l /= ( H.nz_real * H.ny_real * H.nx_real );
+    mean_l /= ((H.nz_real) * (H.ny_real + 1) * (H.nx_real));
 
     #if MPI_CHOLLA
-    mean_g = ReduceRealAvg( mean_l );
-    max_g = ReduceRealMax( max_l );
-    min_g = ReduceRealMin( min_l );
+    mean_g = ReduceRealAvg(mean_l);
+    max_g  = ReduceRealMax(max_l);
+    min_g  = ReduceRealMin(min_l);
     mean_l = mean_g;
-    max_l = max_g;
-    min_l = min_g;
-    #endif  //MPI_CHOLLA
+    max_l  = max_g;
+    min_l  = min_g;
+    #endif  // MPI_CHOLLA
 
     #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-    chprintf( " Energy  Mean: %f   Min: %f   Max: %f      [ h^2 Msun kpc^-3 km^2 s^-2 ] \n", mean_l, min_l, max_l );
-    #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-
-    #ifdef DE
-    // Open the internal Energy dataset
-    dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT);
-    // Read the internal Energy array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
+    chprintf(
+        " abs(Magnetic Y)  Mean: %f   Min: %f   Max: %f      [ Msun^1/2 "
+        "kpc^-1/2 s^-1] \n",
+        mean_l, min_l, max_l);
+    #endif  // PRINT_INITIAL_STATS and COSMOLOGY
+
+    // Open the z magnetic field dataset
+    dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT);
+    // Read the z magnetic field array into the dataset buffer  // NOTE: NEED TO
+    // FIX FOR FLOAT REAL!!!
     status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
     // Free the dataset id
     status = H5Dclose(dataset_id);
 
-    Real temp, temp_max_l, temp_min_l, temp_mean_l;
-    Real temp_min_g, temp_max_g, temp_mean_g;
-    temp_mean_l = 0;
-    temp_min_l = 1e65;
-    temp_max_l = -1;
     mean_l = 0;
-    min_l = 1e65;
-    max_l = -1;
-    // Copy the internal Energy array to the grid
-    for (k=0; k<H.nz_real; k++) {
-      for (j=0; j<H.ny_real; j++) {
-        for (i=0; i<H.nx_real; i++) {
-          id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-          buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-          C.GasEnergy[id] = dataset_buffer[buf_id];
-          mean_l += C.GasEnergy[id];
-          if ( C.GasEnergy[id] > max_l ) max_l = C.GasEnergy[id];
-          if ( C.GasEnergy[id] < min_l ) min_l = C.GasEnergy[id];
-          temp = C.GasEnergy[id] / C.density[id] * ( gama - 1 ) * MP / KB * 1e10 ;
-          temp_mean_l += temp;
-          // chprintf( "%f\n", temp);
-          if ( temp > temp_max_l ) temp_max_l = temp;
-          if ( temp < temp_min_l ) temp_min_l = temp;
+    min_l  = 1e65;
+    max_l  = -1;
+    // Copy the z magnetic field array to the grid
+    for (k = 0; k < H.nz_real + 1; k++) {
+      for (j = 0; j < H.ny_real; j++) {
+        for (i = 0; i < H.nx_real; i++) {
+          id               = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny;
+          buf_id           = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real);
+          C.magnetic_z[id] = dataset_buffer[buf_id];
+
+          mean_l += std::abs(C.magnetic_x[id]);
+          max_l = std::max(max_l, std::abs(C.magnetic_x[id]));
+          min_l = std::min(min_l, std::abs(C.magnetic_x[id]));
         }
       }
     }
-    mean_l /= ( H.nz_real * H.ny_real * H.nx_real );
-    temp_mean_l /= ( H.nz_real * H.ny_real * H.nx_real );
+    mean_l /= ((H.nz_real) * (H.ny_real) * (H.nx_real + 1));
 
     #if MPI_CHOLLA
-    mean_g = ReduceRealAvg( mean_l );
-    max_g = ReduceRealMax( max_l );
-    min_g = ReduceRealMin( min_l );
+    mean_g = ReduceRealAvg(mean_l);
+    max_g  = ReduceRealMax(max_l);
+    min_g  = ReduceRealMin(min_l);
     mean_l = mean_g;
-    max_l = max_g;
-    min_l = min_g;
-    temp_mean_g = ReduceRealAvg( temp_mean_l );
-    temp_max_g = ReduceRealMax( temp_max_l );
-    temp_min_g = ReduceRealMin( temp_min_l );
-    temp_mean_l = temp_mean_g;
-    temp_max_l = temp_max_g;
-    temp_min_l = temp_min_g;
-    #endif  //MPI_CHOLLA
+    max_l  = max_g;
+    min_l  = min_g;
+    #endif  // MPI_CHOLLA
 
     #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-    chprintf( " GasEnergy  Mean: %f   Min: %f   Max: %f      [ h^2 Msun kpc^-3 km^2 s^-2 ] \n", mean_l, min_l, max_l );
-    chprintf( " Temperature  Mean: %f   Min: %f   Max: %f      [ K ] \n", temp_mean_l, temp_min_l, temp_max_l );
-    #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-    #endif//DE
-
-    #ifdef SCALAR
-    #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU)  // Dont Load scalars when using grackle or CHEMISTRY_GPU
-    for (int s=0; s<NSCALARS; s++) {
-      // create the name of the dataset
-      char dataset[100];
-      char number[10];
-      strcpy(dataset, "/scalar");
-      sprintf(number, "%d", s);
-      strcat(dataset,number);
-
-      // Open the scalar dataset
-      dataset_id = H5Dopen(file_id, dataset, H5P_DEFAULT);
-      // Read the scalar array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-
-      // Copy the scalar array to the grid
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[id+s*H.n_cells] = dataset_buffer[buf_id];
-          }
-        }
-      }
-    }
-    #else //Load Chemistry when using GRACKLE or CHEMISTRY_GPU
-    if (P.nfile == 0){
-      Real dens;
-      Real HI_frac = INITIAL_FRACTION_HI;
-      Real HII_frac = INITIAL_FRACTION_HII;
-      Real HeI_frac = INITIAL_FRACTION_HEI;
-      Real HeII_frac = INITIAL_FRACTION_HEII;
-      Real HeIII_frac = INITIAL_FRACTION_HEIII;
-      Real e_frac = INITIAL_FRACTION_ELECTRON;
-      Real metal_frac = INITIAL_FRACTION_METAL;
-      chprintf( " Initial HI Fraction:    %e \n", HI_frac);
-      chprintf( " Initial HII Fraction:   %e \n", HII_frac);
-      chprintf( " Initial HeI Fraction:   %e \n", HeI_frac);
-      chprintf( " Initial HeII Fraction:  %e \n", HeII_frac);
-      chprintf( " Initial HeIII Fraction: %e \n", HeIII_frac);
-      chprintf( " Initial elect Fraction: %e \n", e_frac);
-      #ifdef GRACKLE_METALS
-      chprintf( " Initial metal Fraction: %e \n", metal_frac);
-      #endif  //GRACKEL_METALS
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            dens = C.density[id];
-            C.scalar[0*H.n_cells + id] = HI_frac * dens;
-            C.scalar[1*H.n_cells + id] = HII_frac * dens;
-            C.scalar[2*H.n_cells + id] = HeI_frac * dens;
-            C.scalar[3*H.n_cells + id] = HeII_frac * dens;
-            C.scalar[4*H.n_cells + id] = HeIII_frac * dens;
-            C.scalar[5*H.n_cells + id] = e_frac * dens;
-            #ifdef GRACKLE_METALS
-            C.scalar[6*H.n_cells + id] = metal_frac * dens;
-            #endif
-          }
-        }
-      }
-    }
-    else{
-      dataset_id = H5Dopen(file_id, "/HI_density", H5P_DEFAULT);
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[0*H.n_cells + id] = dataset_buffer[buf_id];
-            // chprintf("%f \n",  C.scalar[0*H.n_cells + id] / C.density[id]);
-          }
-        }
-      }
-      dataset_id = H5Dopen(file_id, "/HII_density", H5P_DEFAULT);
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[1*H.n_cells + id] = dataset_buffer[buf_id];
-          }
-        }
-      }
-      dataset_id = H5Dopen(file_id, "/HeI_density", H5P_DEFAULT);
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[2*H.n_cells + id] = dataset_buffer[buf_id];
-          }
-        }
-      }
-      dataset_id = H5Dopen(file_id, "/HeII_density", H5P_DEFAULT);
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[3*H.n_cells + id] = dataset_buffer[buf_id];
-          }
-        }
-      }
-      dataset_id = H5Dopen(file_id, "/HeIII_density", H5P_DEFAULT);
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[4*H.n_cells + id] = dataset_buffer[buf_id];
-          }
-        }
-      }
-      dataset_id = H5Dopen(file_id, "/e_density", H5P_DEFAULT);
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[5*H.n_cells + id] = dataset_buffer[buf_id];
-          }
-        }
-      }
-      #ifdef GRACKLE_METALS
-      dataset_id = H5Dopen(file_id, "/metal_density", H5P_DEFAULT);
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      status = H5Dclose(dataset_id);
-      for (k=0; k<H.nz_real; k++) {
-        for (j=0; j<H.ny_real; j++) {
-          for (i=0; i<H.nx_real; i++) {
-            id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny;
-            buf_id = k + j*H.nz_real + i*H.nz_real*H.ny_real;
-            C.scalar[6*H.n_cells + id] = dataset_buffer[buf_id];
-          }
-        }
-      }
-      #endif  //GRACKLE_METALS
-    }
-    #endif//COOLING_GRACKLE
-    #endif//SCALAR
-
-    #ifdef  MHD
-      // Start by creating a dataspace and buffer that is large enough for the
-      // magnetic field since it's one larger than the rest
-      free(dataset_buffer);
-      dataset_buffer = (Real *) malloc((H.nz_real+1)*(H.ny_real+1)*(H.nx_real+1)*sizeof(Real));
-
-
-      // Open the x magnetic field dataset
-      dataset_id = H5Dopen(file_id, "/magnetic_x", H5P_DEFAULT);
-      // Read the x magnetic field array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-
-      mean_l = 0;
-      min_l = 1e65;
-      max_l = -1;
-      // Copy the x magnetic field array to the grid
-      for (k=0; k<H.nz_real+1; k++) {
-        for (j=0; j<H.ny_real+1; j++) {
-          for (i=0; i<H.nx_real+1; i++) {
-            id = (i+H.n_ghost-1) + (j+H.n_ghost-1)*H.nx + (k+H.n_ghost-1)*H.nx*H.ny;
-            buf_id = k + j*(H.nz_real+1) + i*(H.nz_real+1)*(H.ny_real+1);
-            C.magnetic_x[id] = dataset_buffer[buf_id];
-            mean_l += fabs(C.magnetic_x[id]);
-            if ( fabs(C.magnetic_x[id]) > max_l ) max_l = fabs(C.magnetic_x[id]);
-            if ( fabs(C.magnetic_x[id]) < min_l ) min_l = fabs(C.magnetic_x[id]);
-          }
-        }
-      }
-      mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) );
-
-      #if MPI_CHOLLA
-        mean_g = ReduceRealAvg( mean_l );
-        max_g = ReduceRealMax( max_l );
-        min_g = ReduceRealMin( min_l );
-        mean_l = mean_g;
-        max_l = max_g;
-        min_l = min_g;
-      #endif  //MPI_CHOLLA
-
-      #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-        chprintf( " abs(Magnetic X)  Mean: %f   Min: %f   Max: %f      [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l );
-      #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-      // Open the y magnetic field dataset
-      dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT);
-      // Read the y magnetic field array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-
-      mean_l = 0;
-      min_l = 1e65;
-      max_l = -1;
-      // Copy the y magnetic field array to the grid
-      for (k=0; k<H.nz_real+1; k++) {
-        for (j=0; j<H.ny_real+1; j++) {
-          for (i=0; i<H.nx_real+1; i++) {
-            id = (i+H.n_ghost-1) + (j+H.n_ghost-1)*H.nx + (k+H.n_ghost-1)*H.nx*H.ny;
-            buf_id = k + j*(H.nz_real+1) + i*(H.nz_real+1)*(H.ny_real+1);
-            C.magnetic_y[id] = dataset_buffer[buf_id];
-            mean_l += fabs(C.magnetic_y[id]);
-            if ( fabs(C.magnetic_y[id]) > max_l ) max_l = fabs(C.magnetic_y[id]);
-            if ( fabs(C.magnetic_y[id]) < min_l ) min_l = fabs(C.magnetic_y[id]);
-          }
-        }
-      }
-      mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) );
-
-      #if MPI_CHOLLA
-        mean_g = ReduceRealAvg( mean_l );
-        max_g = ReduceRealMax( max_l );
-        min_g = ReduceRealMin( min_l );
-        mean_l = mean_g;
-        max_l = max_g;
-        min_l = min_g;
-      #endif  //MPI_CHOLLA
-
-      #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-        chprintf( " abs(Magnetic Y)  Mean: %f   Min: %f   Max: %f      [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l );
-      #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-
-      // Open the z magnetic field dataset
-      dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT);
-      // Read the z magnetic field array into the dataset buffer  // NOTE: NEED TO FIX FOR FLOAT REAL!!!
-      status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-      // Free the dataset id
-      status = H5Dclose(dataset_id);
-
-      mean_l = 0;
-      min_l = 1e65;
-      max_l = -1;
-      // Copy the z magnetic field array to the grid
-      for (k=0; k<H.nz_real+1; k++) {
-        for (j=0; j<H.ny_real+1; j++) {
-          for (i=0; i<H.nx_real+1; i++) {
-            id = (i+H.n_ghost-1) + (j+H.n_ghost-1)*H.nx + (k+H.n_ghost-1)*H.nx*H.ny;
-            buf_id = k + j*(H.nz_real+1) + i*(H.nz_real+1)*(H.ny_real+1);
-            C.magnetic_z[id] = dataset_buffer[buf_id];
-            mean_l += fabs(C.magnetic_z[id]);
-            if ( fabs(C.magnetic_z[id]) > max_l ) max_l = fabs(C.magnetic_z[id]);
-            if ( fabs(C.magnetic_z[id]) < min_l ) min_l = fabs(C.magnetic_z[id]);
-          }
-        }
-      }
-      mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) );
-
-      #if MPI_CHOLLA
-        mean_g = ReduceRealAvg( mean_l );
-        max_g = ReduceRealMax( max_l );
-        min_g = ReduceRealMin( min_l );
-        mean_l = mean_g;
-        max_l = max_g;
-        min_l = min_g;
-      #endif  //MPI_CHOLLA
-
-      #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-        chprintf( " abs(Magnetic Z)  Mean: %f   Min: %f   Max: %f      [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l );
-      #endif  //PRINT_INITIAL_STATS and COSMOLOGY
-    #endif  //MHD
+    chprintf(
+        " abs(Magnetic Z)  Mean: %f   Min: %f   Max: %f      [ Msun^1/2 "
+        "kpc^-1/2 s^-1] \n",
+        mean_l, min_l, max_l);
+    #endif  // PRINT_INITIAL_STATS and COSMOLOGY
+  #endif    // MHD
   }
   free(dataset_buffer);
 }
 #endif
 
-
-
 /* MPI-safe printf routine */
-int chprintf(const char * __restrict sdata, ...)
+int chprintf(const char *__restrict sdata, ...)  // NOLINT(cert-dcl50-cpp)
 {
   int code = 0;
-#ifdef MPI_CHOLLA
   /*limit printf to root process only*/
-  if(procID==root)
-  {
-#endif /*MPI_CHOLLA*/
-
-  va_list ap;
-  va_start(ap, sdata);
-  code = vfprintf(stdout, sdata, ap);
-  va_end(ap);
-  fflush(stdout);
-
-#ifdef MPI_CHOLLA
+  if (Is_Root_Proc()) {
+    va_list ap;
+    va_start(ap, sdata);
+    code = vfprintf(stdout, sdata, ap);  // NOLINT(clang-analyzer-valist.Uninitialized)
+    va_end(ap);
+    fflush(stdout);
   }
-#endif /*MPI_CHOLLA*/
 
   return code;
 }
 
-
-void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp) {
-
-  Real cd,sd,cp,sp,ct,st; //sines and cosines
-  Real a00, a01, a02;     //rotation matrix elements
+void Rotate_Point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp)
+{
+  Real cd, sd, cp, sp, ct, st;  // sines and cosines
+  Real a00, a01, a02;           // rotation matrix elements
   Real a10, a11, a12;
   Real a20, a21, a22;
 
-  //compute trig functions of rotation angles
+  // compute trig functions of rotation angles
   cd = cos(delta);
   sd = sin(delta);
   cp = cos(phi);
@@ -3327,7 +2734,7 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real
   ct = cos(theta);
   st = sin(theta);
 
-  //compute the rotation matrix elements
+  // compute the rotation matrix elements
   /*a00 =       cosp*cosd - sinp*cost*sind;
   a01 = -1.0*(cosp*sind + sinp*cost*cosd);
   a02 =       sinp*sint;
@@ -3339,32 +2746,119 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real
   a20 =       sint*sind;
   a21 =       sint*cosd;
   a22 =       cost;*/
-  a00 = (cp*cd - sp*ct*sd);
-  a01 = -1.0*(cp*sd+sp*ct*cd);
-  a02 = sp*st;
-  a10 = (sp*cd + cp*ct*sd);
-  a11 = (cp*ct*cd -st*sd);
-  a12 = cp*st;
-  a20 = st*sd;
-  a21 = st*cd;
+  a00 = (cp * cd - sp * ct * sd);
+  a01 = -1.0 * (cp * sd + sp * ct * cd);
+  a02 = sp * st;
+  a10 = (sp * cd + cp * ct * sd);
+  a11 = (cp * ct * cd - st * sd);
+  a12 = cp * st;
+  a20 = st * sd;
+  a21 = st * cd;
   a22 = ct;
 
-  *xp = a00*x + a01*y + a02*z;
-  *yp = a10*x + a11*y + a12*z;
-  *zp = a20*x + a21*y + a22*z;
-
+  *xp = a00 * x + a01 * y + a02 * z;
+  *yp = a10 * x + a11 * y + a12 * z;
+  *zp = a20 * x + a21 * y + a22 * z;
 }
 
-void write_debug ( Real *Value, const char *fname, int nValues, int iProc )
-  {
+void Write_Debug(Real *Value, const char *fname, int nValues, int iProc)
+{
   char fn[1024];
   int ret;
 
   sprintf(fn, "%s_%07d.txt", fname, iProc);
   FILE *fp = fopen(fn, "w");
 
-  for ( int iV = 0; iV < nValues; iV++ )
+  for (int iV = 0; iV < nValues; iV++) {
     fprintf(fp, "%e\n", Value[iV]);
+  }
+
+  fclose(fp);
+}
 
-  fclose (fp);
+std::string FnameTemplate::effective_output_dir_path(int nfile) const noexcept
+{
+  // for consistency, ensure that the returned string always has a trailing "/"
+  if (outdir_.empty()) {
+    return "./";
+  } else if (separate_cycle_dirs_) {
+    return this->outdir_ + "/" + std::to_string(nfile) + "/";
+  } else {
+    // if the last character of outdir is not a '/', then the substring of
+    // characters after the final '/' (or entire string if there isn't any '/')
+    // is treated as a file-prefix
+    //
+    // this is accomplished here:
+    std::filesystem::path without_file_prefix = std::filesystem::path(this->outdir_).parent_path();
+    return without_file_prefix.string() + "/";
   }
+}
+
+std::string FnameTemplate::format_fname(int nfile, const std::string &pre_extension_suffix) const noexcept
+{
+#ifdef MPI_CHOLLA
+  int file_proc_id = procID;
+#else
+  int file_proc_id = 0;
+#endif
+  return format_fname(nfile, file_proc_id, pre_extension_suffix);
+}
+
+std::string FnameTemplate::format_fname(int nfile, int file_proc_id,
+                                        const std::string &pre_extension_suffix) const noexcept
+{
+  // get the leading section of the string
+  const std::string path_prefix =
+      (separate_cycle_dirs_)
+          ? (effective_output_dir_path(nfile) + "/")  // while redundant, the slash signals our intent
+          : outdir_;
+
+  // get the file extension
+#if defined BINARY
+  const char *extension = ".bin";
+#elif defined HDF5
+  const char *extension = ".h5";
+#else
+  const char *extension = ".txt";
+#endif
+
+  std::string procID_part = "." + std::to_string(file_proc_id);  // initialized to empty string
+
+  return path_prefix + std::to_string(nfile) + pre_extension_suffix + extension + procID_part;
+}
+
+void Ensure_Dir_Exists(std::string dir_path)
+{
+  if (Is_Root_Proc()) {
+    // if the last character of outdir is not a '/', then the substring of
+    // characters after the final '/' (or entire string if there isn't any '/')
+    // is treated as a file-prefix
+    //
+    // this is accomplished here:
+    std::filesystem::path path = std::filesystem::path(dir_path);
+
+    if (!dir_path.empty()) {
+      // try to create all directories specified within outdir (does nothing if
+      // the directories already exist)
+      std::error_code err_code;
+      std::filesystem::create_directories(path, err_code);
+
+      // confirm that an error-code wasn't set & that the path actually refers
+      // to a directory (it's unclear from docs whether err-code is set in that
+      // case)
+      if (err_code or not std::filesystem::is_directory(path)) {
+        CHOLLA_ERROR(
+            "something went wrong while trying to create the path to the "
+            "directory: %s",
+            dir_path.c_str());
+      }
+    }
+  }
+
+  // this barrier ensures we won't ever encounter a scenario when 1 process
+  // tries to write a file to a non-existent directory before the root process
+  // has a chance to create it
+#ifdef MPI_CHOLLA
+  MPI_Barrier(world);
+#endif
+}
diff --git a/src/io/io.h b/src/io/io.h
index f7dfe6eb7..d8f6ca8ca 100644
--- a/src/io/io.h
+++ b/src/io/io.h
@@ -1,44 +1,138 @@
 #pragma once
 
-#include "../global/global.h"
-#include "../grid/grid3D.h"
+#include <iomanip>
 #include <iostream>
+#include <sstream>
 
+#include "../global/global.h"
+#include "../grid/grid3D.h"
 
 /* Write the data */
-void WriteData(Grid3D &G, struct parameters P, int nfile);
+void Write_Data(Grid3D& G, struct Parameters P, int nfile);
 
 /* Output the grid data to file. */
-void OutputData(Grid3D &G, struct parameters P, int nfile);
+void Output_Data(Grid3D& G, struct Parameters P, int nfile);
 
 /* Output the grid data to file as 32-bit floats. */
-void OutputFloat32(Grid3D &G, struct parameters P, int nfile);
+void Output_Float32(Grid3D& G, struct Parameters P, int nfile);
 
 /* Output a projection of the grid data to file. */
-void OutputProjectedData(Grid3D &G, struct parameters P, int nfile);
+void Output_Projected_Data(Grid3D& G, struct Parameters P, int nfile);
 
 /* Output a rotated projection of the grid data to file. */
-void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile);
+void Output_Rotated_Projected_Data(Grid3D& G, struct Parameters P, int nfile);
 
 /* Output xy, xz, and yz slices of the grid data to file. */
-void OutputSlices(Grid3D &G, struct parameters P, int nfile);
+void Output_Slices(Grid3D& G, struct Parameters P, int nfile);
 
 /* MPI-safe printf routine */
-int chprintf(const char * __restrict sdata, ...);
+int chprintf(const char* __restrict sdata, ...);
+
+/*!
+ * \brief Convert a floating point number to a string such that it can be
+ * exactly deserialized back from a string to the same floating point number.
+ *
+ * \tparam T Any floating point type
+ * \param[in] input The floating point number to convert
+ * \return std::string The string representation of the input floating point
+ */
+template <typename T>
+std::string to_string_exact(T const& input)
+{
+  std::stringstream output;
+  output << std::setprecision(std::numeric_limits<T>::max_digits10);
+  output << input;
+  return output.str();
+}
+
+void Create_Log_File(struct Parameters P);
+
+void Write_Message_To_Log_File(const char* message);
+
+void Write_Debug(Real* Value, const char* fname, int nValues, int iProc);
+
+/* Lightweight object designed to centralize the file-naming logic (& any associated configuration).
+ *
+ * Cholla pathnames traditionally followed the following template:
+ *     "{outdir}{nfile}{pre_extension_suffix}{extension}.{proc_id}"
+ * where each curly-braced token represents a different variable. In detail:
+ *   - `{outdir}` is the parameter from the parameter file. The historical behavior (that we currently
+ *     maintain), if this is non-empty, then all charaters following the last '/' are treated as a
+ *     prefix to the output file name (if there aren't any '/' characters, then the whole string is
+ *     effectively a prefix.
+ *   - `{nfile}` is the current file-output count.
+ *   - `{pre_extension_suffix}` is the pre-hdf5-extension suffix. It's the suffix that precedes the
+ *     file extension (or `{extension}`)
+ *   - `{extension}` is the filename extension. Examples include ".h5" or ".bin" or ".txt".
+ *   - `{proc_id}` represents the process-id that held the data that will be written to this file.
+ *     Previously, in non-MPI runs, this was omitted.
+ *
+ * Instances can be configured to support the following newer file-naming template
+ *    "{outdir}/{nfile}/{nfile}{pre_extension_suffix}{extension}.{proc_id}"
+ * where the the significance of each curly-braced token is largely unchanged. There are 2 things
+ * worth noting:
+ *   - all files written at a single simulation-cycle are now grouped in a single directory
+ *   - `{outdir}` never specifies a file prefix. When `{outdir}` is empty, it is treated as "./".
+ *     Otherwise, we effectively append '/' to the end of `{outdir}`
+ *
+ * \note
+ * This could probably pull double-duty and get reused with infile.
+ */
+class FnameTemplate
+{
+ public:
+  FnameTemplate() = delete;
 
-void Create_Log_File( struct parameters P );
+  FnameTemplate(bool separate_cycle_dirs, std::string outdir)
+      : separate_cycle_dirs_(separate_cycle_dirs), outdir_(std::move(outdir))
+  {
+  }
 
-void Write_Message_To_Log_File( const char* message );
+  FnameTemplate(const Parameters& P) : FnameTemplate(not P.legacy_flat_outdir, P.outdir) {}
 
-void write_debug ( Real *Value, const char *fname, int nValues, int iProc );
+  /* Specifies whether separate cycles are written to separate directories */
+  bool separate_cycle_dirs() const noexcept { return separate_cycle_dirs_; }
+
+  /* Returns the effective output-directory used for outputs at a given simulation-cycle */
+  std::string effective_output_dir_path(int nfile) const noexcept;
+
+  /* format the file path */
+  std::string format_fname(int nfile, const std::string& pre_extension_suffix) const noexcept;
+
+  std::string format_fname(int nfile, int file_proc_id, const std::string& pre_extension_suffix) const noexcept;
+
+ private:
+  bool separate_cycle_dirs_;
+  std::string outdir_;
+};
+
+/* Checks whether the directories referred to within outdir exist. Creates them
+ * if they don't. It gracefully handles cases where outdir contains a prefix
+ * for the output files.
+ */
+void Ensure_Dir_Exists(std::string dir_path);
 
 #ifdef HDF5
 // From io/io.cpp
-herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name);
-herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name);
+
+herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double* attribute, const char* name);
+herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, const char* name);
+
+herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name);
+herr_t Read_HDF5_Dataset(hid_t file_id, float* dataset_buffer, const char* name);
+
+herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name);
+herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name);
+
+/* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */
+void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                Real* hdf5_buffer, Real* grid_buffer);
 
 // From io/io_gpu.cu
-// Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, then write HDF5 field 
-void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name);
-void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name);  
+// Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer,
+// then write HDF5 field
+void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id,
+                         float* buffer, float* device_buffer, Real* source, const char* name, int mhd_direction = -1);
+void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id,
+                         double* buffer, double* device_buffer, Real* source, const char* name, int mhd_direction = -1);
 #endif
diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu
index c6cab6e8a..a793ab792 100644
--- a/src/io/io_gpu.cu
+++ b/src/io/io_gpu.cu
@@ -1,110 +1,182 @@
 // Require HDF5
 #ifdef HDF5
 
-#include <hdf5.h>
+  #include <hdf5.h>
 
-#include "../grid/grid3D.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../utils/cuda_utilities.h"
 
-#include "../io/io.h" // To provide io.h with OutputViz3D
+// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real
+// whereas the conserved variables have size nx,ny,nz.
 
-// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real whereas the conserved variables have size nx,ny,nz
-// Note that magnetic fields add +1 to nx_real ny_real nz_real since an extra face needs to be output, but also has the same size nx ny nz
-// For the magnetic field case, a different nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs.
+// Note that magnetic fields
+// add +1 to nx_real ny_real nz_real since an extra face needs to be output, but
+// also has the same size nx ny nz.
 
-// Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision)
-__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source)
+// For the magnetic field case, a different
+// nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs.
+
+// 2D version of CopyReal3D_GPU_Kernel. Note that magnetic fields and float32 output are not enabled in 2-D so this is a
+// simpler kernel
+__global__ void CopyReal2D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                      Real* destination, Real* source)
 {
+  int const id = threadIdx.x + blockIdx.x * blockDim.x;
+
+  int i, j, k;
+  cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k);
+  // i goes up to nx_real
+  // j goes up to ny_real
+  // for 2D, k should be 0
+  if (k >= 1) {
+    return;
+  }
 
-  int dest_id,source_id,id,i,j,k;
-  id = threadIdx.x + blockIdx.x * blockDim.x;
+  // This converts into HDF5 indexing that plays well with Python
+  int const dest_id   = j + i * ny_real;
+  int const source_id = (i + n_ghost) + (j + n_ghost) * nx;
 
-  k = id/(nx_real*ny_real);
-  j = (id - k*nx_real*ny_real)/nx_real;
-  i = id - j*nx_real - k*nx_real*ny_real;
+  destination[dest_id] = source[source_id];
+}
+
+// Copy Real (non-ghost) cells from source to a double destination (for writing
+// HDF5 in double precision)
+__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                      double* destination, Real* source, int mhd_direction)
+{
+  int const id = threadIdx.x + blockIdx.x * blockDim.x;
+
+  int i, j, k;
+  cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k);
 
   if (k >= nz_real) {
     return;
   }
 
   // This converts into HDF5 indexing that plays well with Python
-  dest_id = k + j*nz_real + i*ny_real*nz_real;
-  source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny;
+  int const dest_id   = k + j * nz_real + i * ny_real * nz_real;
+  int const source_id = (i + n_ghost - int(mhd_direction == 0)) + (j + n_ghost - int(mhd_direction == 1)) * nx +
+                        (k + n_ghost - int(mhd_direction == 2)) * nx * ny;
 
-  destination[dest_id] = (double) source[source_id];
+  destination[dest_id] = (double)source[source_id];
 }
 
-// Copy Real (non-ghost) cells from source to a float destination (for writing HDF5 in float precision)
-__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source)
+// Copy Real (non-ghost) cells from source to a float destination (for writing
+// HDF5 in float precision)
+__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                      float* destination, Real* source, int mhd_direction)
 {
+  int const id = threadIdx.x + blockIdx.x * blockDim.x;
 
-  int dest_id,source_id,id,i,j,k;
-  id = threadIdx.x + blockIdx.x * blockDim.x;
-
-  k = id/(nx_real*ny_real);
-  j = (id - k*nx_real*ny_real)/nx_real;
-  i = id - j*nx_real - k*nx_real*ny_real;
+  int i, j, k;
+  cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k);
 
   if (k >= nz_real) {
     return;
   }
 
-  // This converts into HDF5 indexing that plays well with Python
-  dest_id = k + j*nz_real + i*ny_real*nz_real;
-  source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny;
+  // This converts into HDF5 indexing that plays well with Python.
+  // The `int(mhd_direction == NUM)` sections provide appropriate shifts for writing out the magnetic fields since they
+  // need an extra cell in the same direction as the field
+  int const dest_id   = k + j * nz_real + i * ny_real * nz_real;
+  int const source_id = (i + n_ghost - int(mhd_direction == 0)) + (j + n_ghost - int(mhd_direction == 1)) * nx +
+                        (k + n_ghost - int(mhd_direction == 2)) * nx * ny;
 
-  destination[dest_id] = (float) source[source_id];
+  destination[dest_id] = (float)source[source_id];
 }
 
-// When buffer is double, automatically use the double version of everything using function overloading
-void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name)
+// When buffer is double, automatically use the double version of everything
+// using function overloading
+void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id,
+                         double* buffer, double* device_buffer, Real* device_source, const char* name,
+                         int mhd_direction)
 {
   herr_t status;
   hsize_t dims[3];
-  dims[0] = nx_real;
-  dims[1] = ny_real;
-  dims[2] = nz_real;
+  dims[0]            = nx_real;
+  dims[1]            = ny_real;
+  dims[2]            = nz_real;
   hid_t dataspace_id = H5Screate_simple(3, dims, NULL);
 
-  //Copy non-ghost parts of source to buffer
-  dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1);
+  // Copy non-ghost parts of source to buffer
+  dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source);
-  CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost));
+  hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost,
+                     device_buffer, device_source, mhd_direction);
+  GPU_Error_Check(
+      cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(double), cudaMemcpyDeviceToHost));
 
   // Write Buffer to HDF5
-  status = HDF5_Dataset(file_id, dataspace_id, buffer, name);
+  status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name);
 
   status = H5Sclose(dataspace_id);
-  if (status < 0) {printf("File write failed.\n");}
-
-
+  if (status < 0) {
+    printf("File write failed.\n");
+  }
 }
 
-
-// When buffer is float, automatically use the float version of everything using function overloading
-void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name)
+// When buffer is float, automatically use the float version of everything using
+// function overloading
+void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id,
+                         float* buffer, float* device_buffer, Real* device_source, const char* name, int mhd_direction)
 {
-
   herr_t status;
   hsize_t dims[3];
-  dims[0] = nx_real;
-  dims[1] = ny_real;
-  dims[2] = nz_real;
+  dims[0]            = nx_real;
+  dims[1]            = ny_real;
+  dims[2]            = nz_real;
   hid_t dataspace_id = H5Screate_simple(3, dims, NULL);
 
-  //Copy non-ghost parts of source to buffer
-  dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1);
+  // Copy non-ghost parts of source to buffer
+  dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1);
   dim3 dim1dBlock(TPB, 1, 1);
-  hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source);
-  CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost));
+  hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost,
+                     device_buffer, device_source, mhd_direction);
+  GPU_Error_Check(
+      cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(float), cudaMemcpyDeviceToHost));
 
   // Write Buffer to HDF5
-  status = HDF5_Dataset(file_id, dataspace_id, buffer, name);
+  status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name);
 
   status = H5Sclose(dataspace_id);
-  if (status < 0) {printf("File write failed.\n");}
-
+  if (status < 0) {
+    printf("File write failed.\n");
+  }
 }
+void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost,
+                                    Real* hdf5_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer)
+{
+  int mhd_direction = -1;
+
+  // 3D case
+  if (nx > 1 && ny > 1 && nz > 1) {
+    dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1);
+    dim3 dim1dBlock(TPB, 1, 1);
+    hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost,
+                       device_hdf5_buffer, device_grid_buffer, mhd_direction);
+    GPU_Error_Check(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * nz_real * sizeof(Real),
+                               cudaMemcpyDeviceToHost));
+    return;
+  }
 
+  // 2D case
+  if (nx > 1 && ny > 1 && nz == 1) {
+    dim3 dim1dGrid((nx_real * ny_real + TPB - 1) / TPB, 1, 1);
+    dim3 dim1dBlock(TPB, 1, 1);
+    hipLaunchKernelGGL(CopyReal2D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost,
+                       device_hdf5_buffer, device_grid_buffer);
+    GPU_Error_Check(
+        cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * sizeof(Real), cudaMemcpyDeviceToHost));
+    return;
+  }
+
+  // 1D case
+  if (nx > 1 && ny == 1 && nz == 1) {
+    GPU_Error_Check(
+        cudaMemcpy(hdf5_buffer, device_grid_buffer + n_ghost, nx_real * sizeof(Real), cudaMemcpyDeviceToHost));
+    return;
+  }
+}
 
-#endif //HDF5
+#endif  // HDF5
diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp
new file mode 100644
index 000000000..22257b1fc
--- /dev/null
+++ b/src/io/io_parallel.cpp
@@ -0,0 +1,141 @@
+// Routines for using Parallel HDF5 to read/write from single file
+#include "../grid/grid3D.h"
+#include "../io/io.h"
+#include "../utils/error_handling.h"
+
+#if defined(HDF5) && defined(MPI_CHOLLA)
+  #include <hdf5.h>
+
+  #include "../mpi/mpi_routines.h"
+  #include "../utils/timing_functions.h"  // provides ScopedTimer
+
+// Warning: H5Sselect_hyperslab expects its pointer args to be arrays of same size as the rank of the dataspace
+// file_space_id
+void Read_HDF5_Selection_3D(hid_t file_id, hsize_t* offset, hsize_t* count, double* buffer, const char* name)
+{
+  hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT);
+  // Select the requested subset of data
+  hid_t file_space_id = H5Dget_space(dataset_id);
+  hid_t mem_space_id  = H5Screate_simple(3, count, NULL);
+
+  // Notes on hyperslab call:
+
+  // First NULL is stride, setting to NULL is like setting to 1, contiguous
+
+  // Second NULL is block, setting to NULL sets block size to 1.
+
+  // Count is the number of blocks in each dimension:
+
+  // since our block size is 1, Count is the number of voxels in each dimension
+
+  herr_t status = H5Sselect_hyperslab(file_space_id, H5S_SELECT_SET, offset, NULL, count, NULL);
+  // Read in the data subset
+  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, mem_space_id, file_space_id, H5P_DEFAULT, buffer);
+
+  // Free the ids
+  status = H5Sclose(mem_space_id);
+  status = H5Sclose(file_space_id);
+  status = H5Dclose(dataset_id);
+}
+
+// Alwin: I'm only writing a 3D version of this because that's what is practical.
+// Read from concatenated HDF5 file
+void Read_Grid_Cat_HDF5_Field(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count,
+                              Real* grid_buffer, const char* name)
+{
+  Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name);
+  Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer);
+}
+
+void Read_Grid_Cat_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count,
+                                       Real* grid_buffer, const char* name)
+{
+  Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name);
+  Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1,
+                             dataset_buffer, grid_buffer);
+}
+
+/*! \brief Read in grid data from a single concatenated output file. */
+void Grid3D::Read_Grid_Cat(struct Parameters P)
+{
+  ScopedTimer timer("Read_Grid_Cat");
+  herr_t status;
+  char filename[100];
+
+  sprintf(filename, "%s%d.h5", P.indir, P.nfile);
+
+  hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+
+  if (file_id < 0) {
+    printf("Unable to open input file: %s\n", filename);
+    exit(0);
+  }
+
+  // TODO (written by Alwin, for anyone to do) :
+  // Consider using collective calls if this part is slow at scale
+  hid_t attribute_id;
+  attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT);
+  status       = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t);
+  status       = H5Aclose(attribute_id);
+  attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT);
+  status       = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step);
+  status       = H5Aclose(attribute_id);
+
+  // Offsets are global variables from mpi_routines.h
+  hsize_t offset[3];
+  offset[0] = nx_local_start;
+  offset[1] = ny_local_start;
+  offset[2] = nz_local_start;
+
+  // This is really dims but I name it count because that's what HDF5 names it
+  hsize_t count[3];
+  count[0] = H.nx_real;
+  count[1] = H.ny_real;
+  count[2] = H.nz_real;
+
+  #ifdef MHD
+  Real* dataset_buffer = (Real*)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real));
+  #else
+  Real* dataset_buffer = (Real*)malloc((H.nz_real) * (H.ny_real) * (H.nx_real) * sizeof(Real));
+  #endif
+
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.density, "/density");
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_x, "/momentum_x");
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_y, "/momentum_y");
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_z, "/momentum_z");
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/Energy");
+  #ifdef DE
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.GasEnergy, "/GasEnergy");
+  #endif  // DE
+
+  #ifdef SCALAR
+    #ifdef BASIC_SCALAR
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.basic_scalar, "/scalar0");
+    #endif
+    #ifdef DUST
+  Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.dust_density, "/dust_density");
+    #endif
+  #endif
+  // TODO (Alwin) : add scalar stuff
+
+  #ifdef MHD
+  Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_x, "/magnetic_x");
+  Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_y, "/magnetic_y");
+  Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_z, "/magnetic_z");
+  #endif
+
+  free(dataset_buffer);
+  status = H5Fclose(file_id);
+}
+
+#else
+
+void Grid3D::Read_Grid_Cat(struct Parameters P)
+{
+  chprintf("Warning: Read_Grid_Cat does nothing without MPI_CHOLLA and HDF5\n");
+  chexit(-1);
+  return;
+  // Does nothing without HDF5 and MPI_CHOLLA
+}
+
+#endif
diff --git a/src/io/io_tests.cpp b/src/io/io_tests.cpp
new file mode 100644
index 000000000..30b43f644
--- /dev/null
+++ b/src/io/io_tests.cpp
@@ -0,0 +1,45 @@
+/*!
+ * \file io_tests.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains all the system tests for code in io.h and io.cpp
+ *
+ */
+
+// External Libraries and Headers
+#include <gtest/gtest.h>
+
+// Local includes
+#include "../io/io.h"
+#include "../system_tests/system_tester.h"
+
+// STL includes
+#include <filesystem>
+#include <string>
+
+// =============================================================================
+TEST(tHYDROtMHDReadGridHdf5, RestartSlowWaveExpectCorrectOutput)
+{
+  // Set parameters
+  int const num_ranks           = 4;
+  std::string restart_nfile_str = "0";
+
+  // Generate the data to read from
+  system_test::SystemTestRunner initializer(false, true, false);
+  initializer.numMpiRanks = num_ranks;
+  initializer.chollaLaunchParams.append(" tout=0.0 outstep=0.0");
+  initializer.launchCholla();
+  std::string const read_directory = initializer.getOutputDirectory() + "/" + restart_nfile_str + "/";
+
+  // Reload data and run the test
+  system_test::SystemTestRunner loadRun(false, true, false);
+  loadRun.numMpiRanks = num_ranks;
+  loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=" + restart_nfile_str + " indir=" + read_directory);
+
+#ifdef MHD
+  loadRun.setFiducialNumTimeSteps(854);
+#else   // not MHD
+  loadRun.setFiducialNumTimeSteps(427);
+#endif  // MHD
+  loadRun.runL1ErrorTest(4.2E-7, 5.4E-7);
+}
+// =============================================================================
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index 0df8bcfb4..758b9f54f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -2,51 +2,69 @@
  *  \brief Program to run the grid code. */
 
 #ifdef MPI_CHOLLA
-#include <mpi.h>
-#include "mpi/mpi_routines.h"
+  #include <mpi.h>
+
+  #include "mpi/mpi_routines.h"
 #endif
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <math.h>
 #include <string.h>
+
 #include "global/global.h"
 #include "grid/grid3D.h"
 #include "io/io.h"
+#include "utils/cuda_utilities.h"
 #include "utils/error_handling.h"
 
+#ifdef SUPERNOVA
+  #include "particles/supernova.h"
+  #ifdef ANALYSIS
+    #include "analysis/feedback_analysis.h"
+  #endif
+#endif  // SUPERNOVA
+#ifdef STAR_FORMATION
+  #include "particles/star_formation.h"
+#endif
+#ifdef MHD
+  #include "mhd/magnetic_divergence.h"
+#endif  // MHD
+
+#include "grid/grid_enum.h"
 
 int main(int argc, char *argv[])
 {
   // timing variables
   double start_total, stop_total, start_step, stop_step;
-  #ifdef CPU_TIME
+#ifdef CPU_TIME
   double stop_init, init_min, init_max, init_avg;
   double start_bound, stop_bound, bound_min, bound_max, bound_avg;
   double start_hydro, stop_hydro, hydro_min, hydro_max, hydro_avg;
   double init, bound, hydro;
   init = bound = hydro = 0;
-  #endif //CPU_TIME
+#endif  // CPU_TIME
 
   // start the total time
-  start_total = get_time();
+  start_total = Get_Time();
 
+#ifdef MPI_CHOLLA
   /* Initialize MPI communication */
-  #ifdef MPI_CHOLLA
   InitializeChollaMPI(&argc, &argv);
-  #endif /*MPI_CHOLLA*/
+#else
+  // Initialize subset of global parallelism variables usually managed by MPI
+  Init_Global_Parallel_Vars_No_MPI();
+#endif /*MPI_CHOLLA*/
 
-  Real dti = 0; // inverse time step, 1.0 / dt
+  Real dti = 0;  // inverse time step, 1.0 / dt
 
   // input parameter variables
   char *param_file;
-  struct parameters P;
-  int nfile = 0; // number of output files
-  Real outtime = 0; // current output time
-
+  struct Parameters P;
+  int nfile    = 0;  // number of output files
+  Real outtime = 0;  // current output time
 
   // read in command line arguments
-  if (argc < 2)
-  {
+  if (argc < 2) {
     chprintf("usage: %s <parameter_file>\n", argv[0]);
     chprintf("Git Commit Hash = %s\n", GIT_HASH);
     chprintf("Macro Flags     = %s\n", MACRO_FLAGS);
@@ -59,118 +77,155 @@ int main(int argc, char *argv[])
   Grid3D G;
 
   // read in the parameters
-  parse_params (param_file, &P, argc, argv);
+  Parse_Params(param_file, &P, argc, argv);
   // and output to screen
   chprintf("Git Commit Hash = %s\n", GIT_HASH);
   chprintf("Macro Flags     = %s\n", MACRO_FLAGS);
-  chprintf ("Parameter values:  nx = %d, ny = %d, nz = %d, tout = %f, init = %s, boundaries = %d %d %d %d %d %d\n",
-    P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd);
-  if (strcmp(P.init, "Read_Grid") == 0  ) chprintf ("Input directory:  %s\n", P.indir);
-  chprintf ("Output directory:  %s\n", P.outdir);
+  chprintf(
+      "Parameter values:  nx = %d, ny = %d, nz = %d, tout = %f, init = %s, "
+      "boundaries = %d %d %d %d %d %d\n",
+      P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd);
 
-  //Create a Log file to output run-time messages and output the git hash and
-  //macro flags used
+  bool is_restart = false;
+  if (strcmp(P.init, "Read_Grid") == 0) {
+    is_restart = true;
+  }
+  if (strcmp(P.init, "Read_Grid_Cat") == 0) {
+    is_restart = true;
+  }
+
+  if (is_restart) {
+    chprintf("Input directory:  %s\n", P.indir);
+  }
+  chprintf("Output directory:  %s\n", P.outdir);
+
+  // Check the configuration
+  Check_Configuration(P);
+
+  // Create a Log file to output run-time messages and output the git hash and
+  // macro flags used
   Create_Log_File(P);
   std::string message = "Git Commit Hash = " + std::string(GIT_HASH);
-  Write_Message_To_Log_File( message.c_str() );
+  Write_Message_To_Log_File(message.c_str());
   message = "Macro Flags     = " + std::string(MACRO_FLAGS);
-  Write_Message_To_Log_File( message.c_str() );
-
-
+  Write_Message_To_Log_File(message.c_str());
 
   // initialize the grid
   G.Initialize(&P);
   chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells);
 
   message = "Initializing Simulation";
-  Write_Message_To_Log_File( message.c_str() );
+  Write_Message_To_Log_File(message.c_str());
 
-  // Set initial conditions and calculate first dt
+  // Set initial conditions
   chprintf("Setting initial conditions...\n");
   G.Set_Initial_Conditions(P);
   chprintf("Initial conditions set.\n");
-  // set main variables for Read_Grid initial conditions
-  if (strcmp(P.init, "Read_Grid") == 0) {
-    dti = C_cfl / G.H.dt;
+  // set main variables for Read_Grid and Read_Grid_Cat initial conditions
+  if (is_restart) {
     outtime += G.H.t;
     nfile = P.nfile;
   }
 
-  #ifdef DE
-  chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f   eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2 );
-  message =  " eta_1: " + std::to_string(DE_ETA_1) + "   eta_2: " + std::to_string(DE_ETA_2);
-  Write_Message_To_Log_File( message.c_str() );
-  #endif
+#ifdef DE
+  chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f   eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2);
+  message = " eta_1: " + std::to_string(DE_ETA_1) + "   eta_2: " + std::to_string(DE_ETA_2);
+  Write_Message_To_Log_File(message.c_str());
+#endif
 
-  #ifdef CPU_TIME
+#ifdef CPU_TIME
   G.Timer.Initialize();
-  #endif
+#endif
 
-  #ifdef GRAVITY
+#ifdef GRAVITY
   G.Initialize_Gravity(&P);
-  #endif
+#endif
 
-  #ifdef PARTICLES
+#ifdef PARTICLES
   G.Initialize_Particles(&P);
-  #endif
+#endif
 
-  #ifdef COSMOLOGY
+#ifdef COSMOLOGY
   G.Initialize_Cosmology(&P);
-  #endif
+#endif
 
-  #ifdef COOLING_GRACKLE
+#ifdef COOLING_GRACKLE
   G.Initialize_Grackle(&P);
-  #endif
+#endif
 
-  #ifdef CHEMISTRY_GPU
+#ifdef CHEMISTRY_GPU
   G.Initialize_Chemistry(&P);
-  #endif
+#endif
 
-  #ifdef ANALYSIS
-  G.Initialize_Analysis_Module(&P);
-  if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P);
-  #endif
+#ifdef ANALYSIS
+  G.Initialize_AnalysisModule(&P);
+  if (G.Analysis.Output_Now) {
+    G.Compute_and_Output_Analysis(&P);
+  }
+#endif
+
+#if defined(SUPERNOVA) && defined(PARTICLE_AGE)
+  FeedbackAnalysis sn_analysis(G);
+  #ifdef MPI_CHOLLA
+  supernova::initState(&P, G.Particles.n_total_initial);
+  #else
+  supernova::initState(&P, G.Particles.n_local);
+  #endif  // MPI_CHOLLA
+#endif    // SUPERNOVA && PARTICLE_AGE
 
-  #ifdef GRAVITY
+#ifdef STAR_FORMATION
+  star_formation::Initialize(G);
+#endif
+
+#ifdef GRAVITY_ANALYTIC_COMP
+  G.Setup_Analytic_Potential(&P);
+#endif
+
+#ifdef GRAVITY
   // Get the gravitational potential for the first timestep
-  G.Compute_Gravitational_Potential( &P);
-  #endif
+  G.Compute_Gravitational_Potential(&P);
+#endif
 
-  // Set boundary conditions (assign appropriate values to ghost cells) for hydro and potential
+  // Set boundary conditions (assign appropriate values to ghost cells) for
+  // hydro and potential
   chprintf("Setting boundary conditions...\n");
   G.Set_Boundary_Conditions_Grid(P);
   chprintf("Boundary conditions set.\n");
 
-  #ifdef GRAVITY_ANALYTIC_COMP
-  // add analytic component to gravity potential.
-  G.Add_Analytic_Potential(&P);
-  #endif
+#ifdef GRAVITY_ANALYTIC_COMP
+  G.Add_Analytic_Potential();
+#endif
 
-  #ifdef PARTICLES
+#ifdef PARTICLES
   // Get the particles acceleration for the first timestep
   G.Get_Particles_Acceleration();
-  #endif
+#endif
 
   chprintf("Dimensions of each cell: dx = %f dy = %f dz = %f\n", G.H.dx, G.H.dy, G.H.dz);
-  chprintf("Ratio of specific heats gamma = %f\n",gama);
-  chprintf("Nstep = %d  Timestep = %f  Simulation time = %f\n", G.H.n_step, G.H.dt, G.H.t);
-
+  chprintf("Ratio of specific heats gamma = %f\n", gama);
+  chprintf("Nstep = %d  Simulation time = %f\n", G.H.n_step, G.H.t);
 
-  #ifdef OUTPUT
-  if (strcmp(P.init, "Read_Grid") != 0 || G.H.Output_Now ) {
+#ifdef OUTPUT
+  if (!is_restart || G.H.Output_Now) {
     // write the initial conditions to file
     chprintf("Writing initial conditions to file...\n");
-    WriteData(G, P, nfile);
+    Write_Data(G, P, nfile);
   }
   // add one to the output file count
   nfile++;
-  #endif //OUTPUT
+#endif  // OUTPUT
+
+#ifdef MHD
+  // Check that the initial magnetic field has zero divergence
+  mhd::checkMagneticDivergence(G);
+#endif  // MHD
+
   // increment the next output time
   outtime += P.outstep;
 
-  #ifdef CPU_TIME
-  stop_init = get_time();
-  init = stop_init - start_total;
+#ifdef CPU_TIME
+  stop_init = Get_Time();
+  init      = stop_init - start_total;
   #ifdef MPI_CHOLLA
   init_min = ReduceRealMin(init);
   init_max = ReduceRealMax(init);
@@ -178,34 +233,44 @@ int main(int argc, char *argv[])
   chprintf("Init  min: %9.4f  max: %9.4f  avg: %9.4f\n", init_min, init_max, init_avg);
   #else
   printf("Init %9.4f\n", init);
-  #endif //MPI_CHOLLA
-  #endif //CPU_TIME
+  #endif  // MPI_CHOLLA
+#endif    // CPU_TIME
 
   // Evolve the grid, one timestep at a time
   chprintf("Starting calculations.\n");
   message = "Starting calculations.";
-  Write_Message_To_Log_File( message.c_str() );
-  while (G.H.t < P.tout)
-  {
-    // get the start time
-    #ifdef CPU_TIME
+  Write_Message_To_Log_File(message.c_str());
+
+  // Compute inverse timestep for the first time
+  dti = G.Calc_Inverse_Timestep();
+
+  while (G.H.t < P.tout) {
+// get the start time
+#ifdef CPU_TIME
     G.Timer.Total.Start();
-    #endif //CPU_TIME
-    start_step = get_time();
+#endif  // CPU_TIME
+    start_step = Get_Time();
 
-    // calculate the timestep. Note: this computes the timestep ONLY on the
-    // first loop, on subsequent time steps it just calls the MPI_Allreduce to
-    // determine the global timestep
+    // calculate the timestep by calling MPI_Allreduce
     G.set_dt(dti);
 
-    if (G.H.t + G.H.dt > outtime) G.H.dt = outtime - G.H.t;
+    // adjust timestep based on the next available scheduled time
+    const Real next_scheduled_time = fmin(outtime, P.tout);
+    if (G.H.t + G.H.dt > next_scheduled_time) {
+      G.H.dt = next_scheduled_time - G.H.t;
+    }
+
+#if defined(SUPERNOVA) && defined(PARTICLE_AGE)
+    supernova::Cluster_Feedback(G, sn_analysis);
+#endif  // SUPERNOVA && PARTICLE_AGE
 
-    #ifdef PARTICLES
-    //Advance the particles KDK( first step ): Velocities are updated by 0.5*dt and positions are updated by dt
-    G.Advance_Particles( 1 );
-    //Transfer the particles that moved outside the local domain
+#ifdef PARTICLES
+    // Advance the particles KDK( first step ): Velocities are updated by 0.5*dt
+    // and positions are updated by dt
+    G.Advance_Particles(1);
+    // Transfer the particles that moved outside the local domain
     G.Transfer_Particles_Boundaries(P);
-    #endif
+#endif
 
     // Advance the grid by one timestep
     dti = G.Update_Hydro_Grid();
@@ -213,112 +278,120 @@ int main(int argc, char *argv[])
     // update the simulation time ( t += dt )
     G.Update_Time();
 
-
-    #ifdef GRAVITY
-    //Compute Gravitational potential for next step
-    G.Compute_Gravitational_Potential( &P);
-    #endif
+#ifdef GRAVITY
+    // Compute Gravitational potential for next step
+    G.Compute_Gravitational_Potential(&P);
+#endif
 
     // add one to the timestep count
     G.H.n_step++;
 
-    //Set the Grid boundary conditions for next time step
+    // Set the Grid boundary conditions for next time step
     G.Set_Boundary_Conditions_Grid(P);
 
-    #ifdef GRAVITY_ANALYTIC_COMP
-    // add analytic component to gravity potential.
-    G.Add_Analytic_Potential(&P);
-    #endif
+#ifdef GRAVITY_ANALYTIC_COMP
+    G.Add_Analytic_Potential();
+#endif
 
-    #ifdef PARTICLES
-    ///Advance the particles KDK( second step ): Velocities are updated by 0.5*dt using the Accelerations at the new positions
-    G.Advance_Particles( 2 );
-    #endif
+#ifdef PARTICLES
+    /// Advance the particles KDK( second step ): Velocities are updated by
+    /// 0.5*dt using the Accelerations at the new positions
+    G.Advance_Particles(2);
+#endif
 
-    #ifdef PARTICLE_AGE
-    //G.Cluster_Feedback();
-    #endif
+#ifdef STAR_FORMATION
+    star_formation::Star_Formation(G);
+#endif
 
-    #ifdef CPU_TIME
+#ifdef CPU_TIME
+    cuda_utilities::Print_GPU_Memory_Usage();
     G.Timer.Total.End();
-    #endif //CPU_TIME
+#endif  // CPU_TIME
 
-    #ifdef CPU_TIME
+#ifdef CPU_TIME
     G.Timer.Print_Times();
-    #endif
+#endif
 
     // get the time to compute the total timestep
-    stop_step = get_time();
-    stop_total = get_time();
-    G.H.t_wall = stop_total-start_total;
-    #ifdef MPI_CHOLLA
+    stop_step  = Get_Time();
+    stop_total = Get_Time();
+    G.H.t_wall = stop_total - start_total;
+#ifdef MPI_CHOLLA
     G.H.t_wall = ReduceRealMax(G.H.t_wall);
-    #endif
-    chprintf("n_step: %d   sim time: %10.7f   sim timestep: %7.4e  timestep time = %9.3f ms   total time = %9.4f s\n\n",
-      G.H.n_step, G.H.t, G.H.dt, (stop_step-start_step)*1000, G.H.t_wall);
+#endif
+    chprintf(
+        "n_step: %d   sim time: %10.7f   sim timestep: %7.4e  timestep time = "
+        "%9.3f ms   total time = %9.4f s\n\n",
+        G.H.n_step, G.H.t, G.H.dt, (stop_step - start_step) * 1000, G.H.t_wall);
 
-    #ifdef OUTPUT_ALWAYS
-    G.H.Output_Now = true;
-    #endif
+    if (P.output_always) G.H.Output_Now = true;
 
-    #ifdef ANALYSIS
-    if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P);
-    #endif
+#ifdef ANALYSIS
+    if (G.Analysis.Output_Now) {
+      G.Compute_and_Output_Analysis(&P);
+    }
+  #if defined(SUPERNOVA) && defined(PARTICLE_AGE)
+    sn_analysis.Compute_Gas_Velocity_Dispersion(G);
+  #endif
+#endif
 
-    // if ( P.n_steps_output > 0 && G.H.n_step % P.n_steps_output == 0) G.H.Output_Now = true;
+    // if ( P.n_steps_output > 0 && G.H.n_step % P.n_steps_output == 0)
+    // G.H.Output_Now = true;
 
-    if (G.H.t == outtime || G.H.Output_Now )
-    {
-      #ifdef OUTPUT
+    if (G.H.t == outtime || G.H.Output_Now) {
+#ifdef OUTPUT
       /*output the grid data*/
-      WriteData(G, P, nfile);
+      Write_Data(G, P, nfile);
       // add one to the output file count
       nfile++;
-      #endif //OUTPUT
-      // update to the next output time
-      outtime += P.outstep;
+#endif  // OUTPUT
+      if (G.H.t == outtime) {
+        outtime += P.outstep;  // update to the next output time
+      }
     }
 
-    #ifdef CPU_TIME
+#ifdef CPU_TIME
     G.Timer.n_steps += 1;
-    #endif
+#endif
 
-    #ifdef N_STEPS_LIMIT
+#ifdef N_STEPS_LIMIT
     // Exit the loop when reached the limit number of steps (optional)
-    if ( G.H.n_step == N_STEPS_LIMIT) {
-      WriteData(G, P, nfile);
+    if (G.H.n_step == N_STEPS_LIMIT) {
+  #ifdef OUTPUT
+      Write_Data(G, P, nfile);
+  #endif  // OUTPUT
       break;
     }
-    #endif
-
+#endif
 
-    #ifdef COSMOLOGY
+#ifdef COSMOLOGY
     // Exit the loop when reached the last scale_factor output
-    if ( G.Cosmo.exit_now ) {
-      chprintf( "\nReached Last Cosmological Output: Ending Simulation\n");
+    if (G.Cosmo.exit_now) {
+      chprintf("\nReached Last Cosmological Output: Ending Simulation\n");
       break;
     }
-    #endif
-
-
-  } /*end loop over timesteps*/
+#endif
 
+#ifdef MHD
+    // Check that the magnetic field has zero divergence
+    mhd::checkMagneticDivergence(G);
+#endif  // MHD
+  }     /*end loop over timesteps*/
 
-  #ifdef CPU_TIME
+#ifdef CPU_TIME
   // Print timing statistics
-  G.Timer.Print_Average_Times( P );
-  #endif
+  G.Timer.Print_Average_Times(P);
+#endif
 
   message = "Simulation completed successfully.";
-  Write_Message_To_Log_File( message.c_str() );
+  Write_Message_To_Log_File(message.c_str());
 
   // free the grid
   G.Reset();
 
-  #ifdef MPI_CHOLLA
+#ifdef MPI_CHOLLA
   MPI_Finalize();
-  #endif /*MPI_CHOLLA*/
+#endif /*MPI_CHOLLA*/
 
   return 0;
-
 }
diff --git a/src/main_tests.cpp b/src/main_tests.cpp
index 29e56b496..4600e190d 100644
--- a/src/main_tests.cpp
+++ b/src/main_tests.cpp
@@ -6,10 +6,10 @@
  */
 
 // STL includes
-#include <string>
 #include <algorithm>
-#include <vector>
 #include <stdexcept>
+#include <string>
+#include <vector>
 
 // External Libraries and Headers
 #include <gtest/gtest.h>
@@ -18,85 +18,81 @@
 #include "utils/testing_utilities.h"
 
 /// This is the global variable to store the path to the root of Cholla
-testingUtilities::GlobalString globalChollaRoot;
-testingUtilities::GlobalString globalChollaBuild;
-testingUtilities::GlobalString globalChollaMachine;
-testingUtilities::GlobalString globalMpiLauncher;
+testing_utilities::GlobalString globalChollaRoot;
+testing_utilities::GlobalString globalChollaBuild;
+testing_utilities::GlobalString globalChollaMachine;
+testing_utilities::GlobalString globalMpiLauncher;
 bool globalRunCholla;
 bool globalCompareSystemTestResults;
 
-
 /*!
  * \brief Class for parsing input flags. Modified from
  * https://stackoverflow.com/questions/865668/parsing-command-line-arguments-in-c
  *
  */
-class InputParser{
-    public:
-        // =====================================================================
-        /*!
-         * \brief Get the option that follows the given flag. Also checks that
-         * the flag exists and is not empty
-         *
-         * \param option The string option to look for
-         * \return const std::string& The option the follows a given flag
-         */
-        const std::string& getCmdOption(const std::string &option) const
-        {
-            // First check that the option exists
-            if(not cmdOptionExists(option))
-            {
-                std::string errMessage = "Error: argument '" + option + "' not found. ";
-                throw std::invalid_argument(errMessage);
-            }
+class InputParser
+{
+ public:
+  // =====================================================================
+  /*!
+   * \brief Get the option that follows the given flag. Also checks that
+   * the flag exists and is not empty
+   *
+   * \param option The string option to look for
+   * \return const std::string& The option the follows a given flag
+   */
+  const std::string &Get_Cmd_Option(const std::string &option) const
+  {
+    // First check that the option exists
+    if (not Cmd_Option_Exists(option)) {
+      std::string errMessage = "Error: argument '" + option + "' not found. ";
+      throw std::invalid_argument(errMessage);
+    }
 
-            std::vector<std::string>::const_iterator itr;
-            itr =  std::find(this->_tokens.begin(), this->_tokens.end(), option);
-            if (itr != this->_tokens.end() && ++itr != this->_tokens.end())
-            {
-                return *itr;
-            }
-            else
-            {
-                std::string errMessage = "Error: empty argument '" + option + "'";
-                throw std::invalid_argument(errMessage);
-            }
-        }
-        // =====================================================================
+    std::vector<std::string>::const_iterator itr;
+    itr = std::find(this->_tokens.begin(), this->_tokens.end(), option);
+    if (itr != this->_tokens.end() && ++itr != this->_tokens.end()) {
+      return *itr;
+    } else {
+      std::string errMessage = "Error: empty argument '" + option + "'";
+      throw std::invalid_argument(errMessage);
+    }
+  }
+  // =====================================================================
 
-        // =====================================================================
-        /*!
-         * \brief Checks that an option exists. Returns True if it exists and
-         * False otherwise
-         *
-         * \param option The option flag to search for
-         * \return true The option flag exists in argv
-         * \return false The option flage does not exist in argv
-         */
-        bool cmdOptionExists(const std::string &option) const
-        {
-            return std::find(this->_tokens.begin(), this->_tokens.end(), option)
-            != this->_tokens.end();
-        }
-        // =====================================================================
+  // =====================================================================
+  /*!
+   * \brief Checks that an option exists. Returns True if it exists and
+   * False otherwise
+   *
+   * \param option The option flag to search for
+   * \return true The option flag exists in argv
+   * \return false The option flage does not exist in argv
+   */
+  bool Cmd_Option_Exists(const std::string &option) const
+  {
+    return std::find(this->_tokens.begin(), this->_tokens.end(), option) != this->_tokens.end();
+  }
+  // =====================================================================
 
-        // =====================================================================
-        // constructor and destructor
-        /*!
-         * \brief Construct a new Input Parser object
-         *
-         * \param argc argc from main
-         * \param argv argv from main
-         */
-        InputParser (int &argc, char **argv)
-        {
-            for (int i=1; i < argc; ++i)
-                this->_tokens.push_back(std::string(argv[i]));
-        }
-        ~InputParser() = default;
-        // =====================================================================
-    private:
-        std::vector <std::string> _tokens;
+  // =====================================================================
+  // constructor and destructor
+  /*!
+   * \brief Construct a new Input Parser object
+   *
+   * \param argc argc from main
+   * \param argv argv from main
+   */
+  InputParser(int &argc, char **argv)
+  {
+    for (int i = 1; i < argc; ++i) {
+      this->_tokens.emplace_back(argv[i]);
+    }
+  }
+  ~InputParser() = default;
+  // =====================================================================
+ private:
+  std::vector<std::string> _tokens;
 };
 
 /*!
@@ -111,48 +107,30 @@ class InputParser{
  */
 int main(int argc, char **argv)
 {
-    // First we initialize Googletest. Note, this removes all gtest related
-    // arguments from argv and argc
-    ::testing::InitGoogleTest(&argc, argv);
-
-    // Make sure death tests are threadsafe. This is potentially much slower than
-    // using "fast" instead of "threadsafe" but it makes sure tests are threadsafe
-    // in a multithreaded environment. If the performance becomes an issue we can
-    // try "fast", it can also be set on a test by test basis
-    ::testing::GTEST_FLAG(death_test_style) = "threadsafe";
+  // First we initialize Googletest. Note, this removes all gtest related
+  // arguments from argv and argc
+  ::testing::InitGoogleTest(&argc, argv);
 
-    // Initialize global variables
-    InputParser input(argc, argv);
-    globalChollaRoot.init(input.getCmdOption("--cholla-root"));
-    globalChollaBuild.init(input.getCmdOption("--build-type"));
-    globalChollaMachine.init(input.getCmdOption("--machine"));
-    if (input.cmdOptionExists("--mpi-launcher"))
-    {
-        globalMpiLauncher.init(input.getCmdOption("--mpi-launcher"));
-    }
-    else
-    {
-        globalMpiLauncher.init("mpirun -np");
-    }
+  // Make sure death tests are threadsafe. This is potentially much slower than
+  // using "fast" instead of "threadsafe" but it makes sure tests are threadsafe
+  // in a multithreaded environment. If the performance becomes an issue we can
+  // try "fast", it can also be set on a test by test basis
+  ::testing::GTEST_FLAG(death_test_style) = "threadsafe";
 
-    if (input.cmdOptionExists("--runCholla=false"))
-    {
-        globalRunCholla = false;
-    }
-    else
-    {
-        globalRunCholla = true;
-    }
+  // Initialize global variables
+  InputParser input(argc, argv);
+  globalChollaRoot.init(input.Get_Cmd_Option("--cholla-root"));
+  globalChollaBuild.init(input.Get_Cmd_Option("--build-type"));
+  globalChollaMachine.init(input.Get_Cmd_Option("--machine"));
+  if (input.Cmd_Option_Exists("--mpi-launcher")) {
+    globalMpiLauncher.init(input.Get_Cmd_Option("--mpi-launcher"));
+  } else {
+    globalMpiLauncher.init("mpirun -np");
+  }
 
-    if (input.cmdOptionExists("--compareSystemTestResults=false"))
-    {
-        globalCompareSystemTestResults = false;
-    }
-    else
-    {
-        globalCompareSystemTestResults = true;
-    }
+  globalRunCholla                = not input.Cmd_Option_Exists("--runCholla=false");
+  globalCompareSystemTestResults = not input.Cmd_Option_Exists("--compareSystemTestResults=false");
 
-    // Run test and return result
-    return RUN_ALL_TESTS();
+  // Run test and return result
+  return RUN_ALL_TESTS();
 }
diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu
new file mode 100644
index 000000000..f061edeb7
--- /dev/null
+++ b/src/mhd/ct_electric_fields.cu
@@ -0,0 +1,282 @@
+/*!
+ * \file ct_electric_fields.cu
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains implementation for the CT electric fields code. Method from
+ * Stone & Gardiner 2009 "A simple unsplit Godunov method for multidimensional
+ * MHD" hereafter referred to as "S&G 2009"
+ *
+ */
+
+// STL Includes
+
+// External Includes
+
+// Local Includes
+#include "../mhd/ct_electric_fields.h"
+#ifdef MHD
+namespace mhd
+{
+// =========================================================================
+__global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *fluxY, Real const *fluxZ,
+                                             Real const *dev_conserved, Real *ctElectricFields, int const nx,
+                                             int const ny, int const nz, int const n_cells)
+{
+  // get a thread index
+  int const threadId = threadIdx.x + blockIdx.x * blockDim.x;
+  int xid, yid, zid;
+  cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid);
+
+  // Thread guard to avoid overrun and to skip the first two cells since
+  // those ghost cells can't be reconstructed
+  if (xid > 0 and yid > 0 and zid > 0 and xid < nx and yid < ny and zid < nz) {
+    // According to Stone et al. 2008 section 5.3 and the source code of
+    // Athena, the following equation relate the magnetic flux to the
+    // face centered electric fields/EMF. -cross(V,B)x is the negative
+    // of the x-component of V cross B. Note that "X" is the direction
+    // the solver is running in this case, not necessarily the true "X".
+    //  F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy =
+    //  -(-cross(V,B))z = -EMF_Z F_x[(grid_enum::fluxX_magnetic_y)*n_cells] =
+    //  VxBz - BxVz =  (-cross(V,B))y =  EMF_Y
+    //  F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy =
+    //  -(-cross(V,B))z = -EMF_X F_y[(grid_enum::fluxY_magnetic_z)*n_cells] =
+    //  VxBz - BxVz =  (-cross(V,B))y =  EMF_Z
+    //  F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy =
+    //  -(-cross(V,B))z = -EMF_Y F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] =
+    //  VxBz - BxVz =  (-cross(V,B))y =  EMF_X
+
+    // Notes on Implementation Details
+    // - The density flux has the same sign as the velocity on the face
+    //   and we only care about the sign so we're using the density flux
+    //   to perform upwinding checks
+    // - All slopes are computed without the factor of two shown in
+    //   Stone & Gardiner 2008 eqn. 24. That factor of two is taken care
+    //   of in the final assembly of the electric field
+
+    // Variable to get the sign of the velocity at the interface.
+    Real signUpwind;
+
+    // Slope and face variables. Format is
+    // "<slope/face>_<direction>_<pos/neg>". Slope/Face indicates if the
+    // value is a slope or a face centered EMF, direction indicates the
+    // direction of the derivative/face and pos/neg indicates if it's
+    // the slope on the positive or negative side of the edge field
+    // being computed. Note that the direction for the face is parallel
+    // to the face and the other direction that is parallel to that face
+    // is the direction of the electric field being calculated
+    Real slope_x_pos, slope_x_neg, slope_y_pos, slope_y_neg, slope_z_pos, slope_z_neg, face_x_pos, face_x_neg,
+        face_y_pos, face_y_neg, face_z_pos, face_z_neg;
+    // ================
+    // X electric field
+    // ================
+
+    // Y-direction slope on the positive Y side. S&G 2009 equation 23
+    signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_y_pos =
+          0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Y-direction slope on the negative Y side. S&G 2009 equation 23
+    signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_y_neg =
+          0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Z-direction slope on the positive Z side. S&G 2009 equation 23
+    signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_z_pos =
+          0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Z-direction slope on the negative Z side. S&G 2009 equation 23
+    signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_z_neg =
+          0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Load the face centered electric fields  Note the negative signs to
+    // convert from magnetic flux to electric field
+
+    face_y_pos =
+        +fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells];
+    face_y_neg =
+        +fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells];
+    face_z_pos =
+        -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells];
+    face_z_neg =
+        -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells];
+
+    // sum and average face centered electric fields and slopes to get the
+    // edge averaged electric field.
+    // S&G 2009 equation 22
+    ctElectricFields[threadId + grid_enum::ct_elec_x * n_cells] =
+        0.25 *
+        (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg);
+
+    // ================
+    // Y electric field
+    // ================
+
+    // X-direction slope on the positive X side. S&G 2009 equation 23
+    signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_x_pos =
+          0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // X-direction slope on the negative X side. S&G 2009 equation 23
+    signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_x_neg =
+          0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Z-direction slope on the positive Z side. S&G 2009 equation 23
+    signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_z_pos =
+          0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Z-direction slope on the negative Z side. S&G 2009 equation 23
+    signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_z_neg =
+          0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Load the face centered electric fields  Note the negative signs to
+    // convert from magnetic flux to electric field
+    face_x_pos =
+        -fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells];
+    face_x_neg =
+        -fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells];
+    face_z_pos =
+        +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells];
+    face_z_neg =
+        +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells];
+
+    // sum and average face centered electric fields and slopes to get the
+    // edge averaged electric field.
+    // S&G 2009 equation 22
+    ctElectricFields[threadId + grid_enum::ct_elec_y * n_cells] =
+        0.25 *
+        (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg);
+
+    // ================
+    // Z electric field
+    // ================
+
+    // Y-direction slope on the positive Y side. S&G 2009 equation 23
+    signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_y_pos =
+          0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Y-direction slope on the negative Y side. S&G 2009 equation 23
+    signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_y_neg =
+          0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // X-direction slope on the positive X side. S&G 2009 equation 23
+    signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_x_pos =
+          0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // X-direction slope on the negative X side. S&G 2009 equation 23
+    signUpwind = fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells];
+    if (signUpwind > 0.0) {
+      slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells);
+    } else if (signUpwind < 0.0) {
+      slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells);
+    } else {
+      slope_x_neg =
+          0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) +
+                 mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+    }
+
+    // Load the face centered electric fields  Note the negative signs to
+    // convert from magnetic flux to electric field
+    face_x_pos =
+        +fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells];
+    face_x_neg =
+        +fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells];
+    face_y_pos =
+        -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells];
+    face_y_neg =
+        -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells];
+
+    // sum and average face centered electric fields and slopes to get the
+    // edge averaged electric field.
+    // S&G 2009 equation 22
+    ctElectricFields[threadId + grid_enum::ct_elec_z * n_cells] =
+        0.25 *
+        (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg);
+  }
+}
+// =========================================================================
+}  // end namespace mhd
+#endif  // MHD
diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h
new file mode 100644
index 000000000..c151f5bd0
--- /dev/null
+++ b/src/mhd/ct_electric_fields.h
@@ -0,0 +1,144 @@
+/*!
+ * \file ct_electric_fields.h
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains the declaration for the kernel that computes the CT electric
+ * fields. Method from Stone & Gardiner 2009 "A simple unsplit Godunov method
+ * for multidimensional MHD" hereafter referred to as "S&G 2009"
+ *
+ */
+
+#pragma once
+
+// STL Includes
+
+// External Includes
+
+// Local Includes
+#include "../global/global.h"
+#include "../global/global_cuda.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
+
+#ifdef MHD
+namespace mhd
+{
+/*!
+ * \brief Namespace for functions required by functions within the mhd
+ * namespace. Everything in this name space should be regarded as private
+ * but is made accesible for testing
+ *
+ */
+namespace internal
+{
+// =====================================================================
+/*!
+ * \brief Compute and return the slope of the electric field used to
+ * compute the CT electric fields. This function implements S&G 2009
+ * equation 24
+ *
+ * \param[in] flux The flux array
+ * \param[in] dev_conserved The conserved variable array
+ * \param[in] fluxSign The sign of the flux to convert it to magnetic
+ * field. Also serves to choose which magnetic flux is used, i.e. the Y
+ * or Z field
+ * \param[in] ctDirection The direction of the CT field that this slope
+   will be used to calculate
+ * \param[in] conservedQuadrent1 Which index should be reduced by one to get the
+ correct conserved variables. Options are -1 for no reduction, 0 for reducing
+ xid, 1 for reducing yid, and 2 for reducing zid
+ * \param[in] conservedQuadrent2 Which index should be reduced by one to get the
+ correct conserved variables. Options are -1 for no reduction, 0 for reducing
+ xid, 1 for reducing yid, and 2 for reducing zid
+ * \param[in] fluxQuadrent1 Which index should be reduced by one to get the
+ correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1
+ for reducing yid, and 2 for reducing zid
+ * \param[in] fluxQuadrent2 Which index should be reduced by one to get the
+ correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1
+ for reducing yid, and 2 for reducing zid
+ * \param[in] xid The x index
+ * \param[in] yid The y index
+ * \param[in] zid The z index
+ * \param[in] nx The number of cells in the x-direction
+ * \param[in] ny The number of cells in the y-direction
+ * \param[in] n_cells The total number of cells
+ * \return Real The slope of the electric field
+ */
+inline __host__ __device__ Real _ctSlope(Real const *flux, Real const *dev_conserved, Real const &fluxSign,
+                                         int const &ctDirection, int const &conservedQuadrent1,
+                                         int const &conservedQuadrent2, int const &fluxQuadrent1,
+                                         int const &fluxQuadrent2, int const &xid, int const &yid, int const &zid,
+                                         int const &nx, int const &ny, int const &n_cells)
+{
+  // Compute the various required indices
+
+  // Get the shifted modulos of the ctDirection.
+  int const modPlus1 = (ctDirection + 1) % 3;
+  int const modPlus2 = (ctDirection + 2) % 3;
+
+  // Indices for the cell centered values
+  int const xidCentered = xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0);
+  int const yidCentered = yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1);
+  int const zidCentered = zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2);
+  int const idxCentered = cuda_utilities::compute1DIndex(xidCentered, yidCentered, zidCentered, nx, ny);
+
+  // Index for the flux
+  int const idxFlux = cuda_utilities::compute1DIndex(xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0),
+                                                     yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1),
+                                                     zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), nx, ny);
+
+  // Indices for the face centered magnetic fields that need to be averaged
+  int const idxB2Shift = cuda_utilities::compute1DIndex(
+      xidCentered - int(modPlus1 == 0), yidCentered - int(modPlus1 == 1), zidCentered - int(modPlus1 == 2), nx, ny);
+  int const idxB3Shift = cuda_utilities::compute1DIndex(
+      xidCentered - int(modPlus2 == 0), yidCentered - int(modPlus2 == 1), zidCentered - int(modPlus2 == 2), nx, ny);
+
+  // Load values for cell centered electric field. B1 (not present) is
+  // the magnetic field in the same direction as the `ctDirection`
+  // variable, B2 and B3 are the next two fields cyclically. i.e. if
+  // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The
+  // same rules apply for the momentum
+  Real const density    = dev_conserved[idxCentered + grid_enum::density * n_cells];
+  Real const Momentum2  = dev_conserved[idxCentered + (modPlus1 + grid_enum::momentum_x) * n_cells];
+  Real const Momentum3  = dev_conserved[idxCentered + (modPlus2 + grid_enum::momentum_x) * n_cells];
+  Real const B2Centered = 0.5 * (dev_conserved[idxCentered + (modPlus1 + grid_enum::magnetic_start) * n_cells] +
+                                 dev_conserved[idxB2Shift + (modPlus1 + grid_enum::magnetic_start) * n_cells]);
+  Real const B3Centered = 0.5 * (dev_conserved[idxCentered + (modPlus2 + grid_enum::magnetic_start) * n_cells] +
+                                 dev_conserved[idxB3Shift + (modPlus2 + grid_enum::magnetic_start) * n_cells]);
+
+  // Compute the electric field in the center with a cross product
+  Real const electric_centered = (Momentum3 * B2Centered - Momentum2 * B3Centered) / density;
+
+  // Load face centered electric field, note fluxSign to correctly do
+  // the shift from magnetic flux to EMF/electric field and to choose
+  // which field to use
+  Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1) + grid_enum::magnetic_start) * n_cells];
+
+  // Compute the slope and return it
+  // S&G 2009 equation 24
+  return electric_face - electric_centered;
+}
+// =====================================================================
+}  // namespace internal
+
+// =========================================================================
+/*!
+ * \brief Compute the Constrained Transport electric fields used to evolve
+ * the magnetic field. Note that this function requires that the density be
+ * non-zero or it will return Nans.
+ *
+ * \param[in] fluxX The flux on the x+1/2 face of each cell
+ * \param[in] fluxY The flux on the y+1/2 face of each cell
+ * \param[in] fluxZ The flux on the z+1/2 face of each cell
+ * \param[in] dev_conserved The device resident grid
+ * \param[out] ctElectricFields The CT electric fields
+ * \param[in] nx The number of cells in the x-direction
+ * \param[in] ny The number of cells in the y-direction
+ * \param[in] nz The number of cells in the z-direction
+ * \param[in] n_cells The total number of cells
+ */
+__global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *fluxY, Real const *fluxZ,
+                                             Real const *dev_conserved, Real *ctElectricFields, int const nx,
+                                             int const ny, int const nz, int const n_cells);
+// =========================================================================
+}  // end  namespace mhd
+#endif  // MHD
\ No newline at end of file
diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu
new file mode 100644
index 000000000..d3a8ea4dc
--- /dev/null
+++ b/src/mhd/ct_electric_fields_tests.cu
@@ -0,0 +1,274 @@
+/*!
+ * \file ct_electric_fields_tests.cu
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Tests for the CT electric fields
+ *
+ */
+
+// STL Includes
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../global/global.h"
+#include "../io/io.h"
+#include "../mhd/ct_electric_fields.h"
+#include "../utils/testing_utilities.h"
+
+#ifdef MHD
+// =============================================================================
+// Tests for the mhd::Calculate_CT_Electric_Fields kernel
+// =============================================================================
+
+// =============================================================================
+/*!
+ * \brief Test fixture for tMHDCalculateCTElectricFields test suite
+ *
+ */
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tMHDCalculateCTElectricFields : public ::testing::Test
+{
+ public:
+  /*!
+   * \brief Initialize and allocate all the various required variables and
+   * arrays
+   *
+   */
+  tMHDCalculateCTElectricFields()
+      : n_cells(nx * ny * nz),
+        fluxX(n_cells * (grid_enum::num_flux_fields)),
+        fluxY(n_cells * (grid_enum::num_flux_fields)),
+        fluxZ(n_cells * (grid_enum::num_flux_fields)),
+        grid(n_cells * (grid_enum::num_fields)),
+        testCTElectricFields(n_cells * 3, -999.),
+        fiducialData(n_cells * 3, -999.),
+        dimGrid((n_cells + TPB - 1) / TPB, 1, 1),
+        dimBlock(TPB, 1, 1)
+  {
+    // Allocate device arrays
+    GPU_Error_Check(cudaMalloc(&dev_fluxX, fluxX.size() * sizeof(double)));
+    GPU_Error_Check(cudaMalloc(&dev_fluxY, fluxY.size() * sizeof(double)));
+    GPU_Error_Check(cudaMalloc(&dev_fluxZ, fluxZ.size() * sizeof(double)));
+    GPU_Error_Check(cudaMalloc(&dev_grid, grid.size() * sizeof(double)));
+    GPU_Error_Check(cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size() * sizeof(double)));
+
+    // Populate the grids with values where vector.at(i) = double(i). The
+    // values chosen aren't that important, just that every cell has a unique
+    // value
+    std::iota(std::begin(fluxX), std::end(fluxX), 0.);
+    std::iota(std::begin(fluxY), std::end(fluxY), fluxX.back() + 1);
+    std::iota(std::begin(fluxZ), std::end(fluxZ), fluxY.back() + 1);
+    std::iota(std::begin(grid), std::end(grid), fluxZ.back() + 1);
+  }
+  ~tMHDCalculateCTElectricFields() = default;
+
+ protected:
+  // Initialize the test grid and other state variables
+  size_t const nx = 2, ny = nx, nz = nx;
+  size_t const n_cells;
+
+  // Launch Parameters
+  dim3 const dimGrid;   // How many blocks in the grid
+  dim3 const dimBlock;  // How many threads per block
+
+  // Make sure the vector is large enough that the locations where the
+  // magnetic field would be in the real grid are filled
+  std::vector<double> fluxX;
+  std::vector<double> fluxY;
+  std::vector<double> fluxZ;
+  std::vector<double> grid;
+  std::vector<double> testCTElectricFields;
+  std::vector<double> fiducialData;
+
+  // device pointers
+  double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, *dev_testCTElectricFields;
+
+  /*!
+   * \brief Launch the kernel and check results
+   *
+   */
+  void Run_Test()
+  {
+    // Copy values to GPU
+    GPU_Error_Check(cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size() * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size() * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size() * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMemcpy(dev_grid, grid.data(), grid.size() * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMemcpy(dev_testCTElectricFields, testCTElectricFields.data(),
+                               testCTElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice));
+
+    // Call the kernel to test
+    hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dimGrid, dimBlock, 0, 0, dev_fluxX, dev_fluxY, dev_fluxZ,
+                       dev_grid, dev_testCTElectricFields, nx, ny, nz, n_cells);
+    GPU_Error_Check();
+
+    // Copy test data back
+    GPU_Error_Check(cudaMemcpy(testCTElectricFields.data(), dev_testCTElectricFields,
+                               testCTElectricFields.size() * sizeof(Real), cudaMemcpyDeviceToHost));
+    cudaDeviceSynchronize();
+
+    // Check the results
+    for (size_t i = 0; i < fiducialData.size(); i++) {
+      int xid, yid, zid;
+      testing_utilities::Check_Results(fiducialData.at(i), testCTElectricFields.at(i),
+                                       "value at i = " + std::to_string(i) + ", xid  = " + std::to_string(xid) +
+                                           ", yid  = " + std::to_string(yid) + ", zid  = " + std::to_string(zid));
+    }
+  }
+};
+// =============================================================================
+
+// =============================================================================
+TEST_F(tMHDCalculateCTElectricFields, PositiveVelocityExpectCorrectOutput)
+{
+  // Fiducial values
+  fiducialData.at(7)  = 60.951467108788492;
+  fiducialData.at(15) = -98.736587665919359;
+  fiducialData.at(23) = 61.768055665002557;
+
+  // Launch kernel and check results
+  Run_Test();
+}
+// =============================================================================
+
+// =============================================================================
+TEST_F(tMHDCalculateCTElectricFields, NegativeVelocityExpectCorrectOutput)
+{
+  // Fiducial values
+  fiducialData.at(7)  = 59.978246483260179;
+  fiducialData.at(15) = -97.279949010457187;
+  fiducialData.at(23) = 61.280813140085613;
+
+  // Set the density fluxes to be negative to indicate a negative velocity
+  // across the face
+  for (size_t i = 0; i < n_cells; i++) {
+    fluxX.at(i) = -fluxX.at(i);
+    fluxY.at(i) = -fluxY.at(i);
+    fluxZ.at(i) = -fluxZ.at(i);
+  }
+
+  // Launch kernel and check results
+  Run_Test();
+}
+// =============================================================================
+
+// =============================================================================
+TEST_F(tMHDCalculateCTElectricFields, ZeroVelocityExpectCorrectOutput)
+{
+  // Fiducial values
+  fiducialData.at(7)  = 60.464856796024335;
+  fiducialData.at(15) = -98.008268338188287;
+  fiducialData.at(23) = 61.524434402544081;
+
+  // Set the density fluxes to be negative to indicate a negative velocity
+  // across the face
+  for (size_t i = 0; i < n_cells; i++) {
+    fluxX.at(i) = 0.0;
+    fluxY.at(i) = 0.0;
+    fluxZ.at(i) = 0.0;
+  }
+
+  // Launch kernel and check results
+  Run_Test();
+}
+// =============================================================================
+
+// =============================================================================
+TEST(tMHDCTSlope, CorrectInputExpectCorrectOutput)
+{
+  // Set up the basic parameters
+  size_t const nx      = 5;
+  size_t const ny      = nx;
+  size_t const nz      = nx;
+  int const xid        = nx / 2;
+  int const yid        = ny / 2;
+  int const zid        = nz / 2;
+  size_t const n_cells = nx * ny * nz;
+
+  // Set up the grid
+  std::vector<double> flux(grid_enum::num_fields * n_cells), conserved(grid_enum::num_fields * n_cells);
+
+  std::mt19937 prng(1);
+  std::uniform_real_distribution<double> doubleRand(-5, 5);
+
+  for (double& conserved_data : conserved) {
+    conserved_data = doubleRand(prng);
+  }
+  for (double& flux_data : flux) {
+    flux_data = doubleRand(prng);
+  }
+
+  // Fiducial data
+  std::vector<double> fiducial_data = {
+      -6.8725060451062561, -77.056763568617669, 1.4564238051915397,  5.4541656143291437,  -0.83503550003671911,
+      -78.091781647940934, -2.6187125848387525, -5.6934594000939542, -16.243259069749971, -59.321631150095314,
+      0.99291378610068892, 4.4004574252725384,  -1.6902722376320516, -63.074645759822637, -4.5776373499662899,
+      -19.476095152639683, -2.0173881091784471, -74.484407919605786, -7.8184484634991724, -0.23206265131850434,
+      0.41622472388590037, -74.479121547383727, -6.9903417764222358, -1.832282425083853};
+
+  // Get test data. Only test the options that will be used
+  std::vector<double> test_data;
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells));
+  test_data.emplace_back(
+      mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells));
+
+  // Check the results
+  ASSERT_EQ(test_data.size(), fiducial_data.size());
+
+  for (size_t i = 0; i < test_data.size(); i++) {
+    testing_utilities::Check_Results(fiducial_data.at(i), test_data.at(i), "");
+  }
+}
+// =============================================================================
+#endif  // MHD
diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu
new file mode 100644
index 000000000..f49e04218
--- /dev/null
+++ b/src/mhd/magnetic_divergence.cu
@@ -0,0 +1,126 @@
+/*!
+ * \file mhd_utilities.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains the implementation of various utility functions for MHD and
+ * for the various kernels, functions, and tools required for the 3D VL+CT MHD
+ * integrator. Due to the CUDA/HIP compiler requiring that device functions be
+ * directly accessible to the file they're used in most device functions will be
+ * implemented in the header file. Uses the same method described in Stone et
+ * al. 2008 "ATHENA: A new code for astrophysical MHD", hereafter referred to as
+ * Stone et al. 2008
+ *
+ */
+
+// STL Includes
+#include <cfloat>
+#include <limits>
+
+// External Includes
+
+// Local Includes
+#include "../grid/grid3D.h"
+#include "../io/io.h"
+#include "../mhd/magnetic_divergence.h"
+#include "../utils/DeviceVector.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/error_handling.h"
+#include "../utils/reduction_utilities.h"
+#ifdef MHD
+
+namespace mhd
+{
+// =========================================================================
+__global__ void calculateMagneticDivergence(Real const *dev_conserved, Real *dev_maxDivergence, Real const dx,
+                                            Real const dy, Real const dz, int const nx, int const ny, int const nz,
+                                            int const n_cells)
+{
+  // Variables to store the divergence
+  Real cellDivergence;
+  Real maxDivergence = 0.0;
+
+  // Index variables
+  int xid, yid, zid, id_xMin1, id_yMin1, id_zMin1;
+
+  // Grid stride loop to perform as much of the reduction as possible
+  for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) {
+    // compute the real indices
+    cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid);
+
+    // Thread guard to avoid overrun and to skip ghost cells that cannot
+    // have their divergences computed due to a missing face;
+    if (xid > 1 and yid > 1 and zid > 1 and xid < nx and yid < ny and zid < nz) {
+      // Compute the various offset indices
+      id_xMin1 = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny);
+      id_yMin1 = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny);
+      id_zMin1 = cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny);
+
+      // Compute divergence
+      // Stone et al. 2008 equation 25
+      cellDivergence = ((dev_conserved[id + (grid_enum::magnetic_x)*n_cells] -
+                         dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) /
+                        dx) +
+                       ((dev_conserved[id + (grid_enum::magnetic_y)*n_cells] -
+                         dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) /
+                        dy) +
+                       ((dev_conserved[id + (grid_enum::magnetic_z)*n_cells] -
+                         dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) /
+                        dz);
+
+      maxDivergence = max(maxDivergence, fabs(cellDivergence));
+    }
+  }
+
+  // Perform reduction across the entire grid
+  reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence);
+}
+// =========================================================================
+
+// =============================================================================
+Real checkMagneticDivergence(Grid3D const &G)
+{
+  // Compute the local value of the divergence
+  // First let's create some variables we'll need.
+  cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence);
+  cuda_utilities::DeviceVector<Real> static dev_maxDivergence(1);
+
+  // Set the device side divergence to the smallest possible double so that
+  // the reduction isn't using the maximum value of the previous iteration
+  dev_maxDivergence.assign(std::numeric_limits<Real>::lowest());
+
+  // Now lets get the local maximum divergence
+  hipLaunchKernelGGL(mhd::calculateMagneticDivergence, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0,
+                     G.C.device, dev_maxDivergence.data(), G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_cells);
+  GPU_Error_Check();
+  Real max_magnetic_divergence = dev_maxDivergence[0];
+
+  #ifdef MPI_CHOLLA
+  // Now that we have the local maximum let's get the global maximum
+  max_magnetic_divergence = ReduceRealMax(max_magnetic_divergence);
+  #endif  // MPI_CHOLLA
+
+  // If the magnetic divergence is greater than the limit then raise a warning and exit.
+  // This maximum value of divergence was chosen after a discussion with Chris White of the Flatiron institute and an
+  // Athena dev. He said that in his experience issues start showing up at around 1E-8 divergence so this is set with an
+  // order of magnitude margin.
+  Real static const magnetic_divergence_limit = 1.0E-9;
+  if (max_magnetic_divergence > magnetic_divergence_limit) {
+    // Report the error and exit
+    chprintf(
+        "The magnetic divergence has exceeded the maximum allowed value. "
+        "Divergence = %7.4e, the maximum allowed divergence = %7.4e\n",
+        max_magnetic_divergence, magnetic_divergence_limit);
+    chexit(-1);
+  } else if (max_magnetic_divergence < 0.0) {
+    // Report the error and exit
+    chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", max_magnetic_divergence);
+    chexit(-1);
+  } else  // The magnetic divergence is within acceptable bounds
+  {
+    chprintf("Global maximum magnetic divergence = %7.4e\n", max_magnetic_divergence);
+  }
+
+  return max_magnetic_divergence;
+}
+// =============================================================================
+}  // end namespace mhd
+#endif  // MHD
diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h
new file mode 100644
index 000000000..3833692c6
--- /dev/null
+++ b/src/mhd/magnetic_divergence.h
@@ -0,0 +1,63 @@
+/*!
+ * \file magnetic_divergence.h
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains the declaration for the functions that compute the magnetic
+ * divergence
+ *
+ */
+
+#pragma once
+
+// STL Includes
+
+// External Includes
+
+// Local Includes
+#include "../global/global.h"
+#include "../global/global_cuda.h"
+#include "../grid/grid3D.h"
+#include "../utils/gpu.hpp"
+
+/*!
+ * \brief Namespace for MHD code
+ *
+ */
+namespace mhd
+{
+// =========================================================================
+/*!
+ * \brief Kernel to compute the maximum divergence of the magnetic field in
+ * the grid. Uses `reduction_utilities::gridReduceMax` and as such should be
+ * called with the minimum number of blocks. Recommend using the occupancy
+ * API
+ *
+ * \param[in] dev_conserved The device array of conserved variables
+ * \param[out] maxDivergence The device scalar to store the reduced divergence at
+ * \param[in] dx Cell size in the X-direction
+ * \param[in] dy Cell size in the Y-direction
+ * \param[in] dz Cell size in the Z-direction
+ * \param[in] nx Number of cells in the X-direction
+ * \param[in] ny Number of cells in the Y-direction
+ * \param[in] nz Number of cells in the Z-direction
+ * \param[in] n_cells Total number of cells
+ */
+__global__ void calculateMagneticDivergence(Real const *dev_conserved, Real *maxDivergence, Real const dx,
+                                            Real const dy, Real const dz, int const nx, int const ny, int const nz,
+                                            int const n_cells);
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Compute the maximum magnetic divergence in the grid and report
+ * an error if it exceeds the magnetic divergence limit or is negative. The
+ * magnetic divergence limit is 1E-14 as determined by Athena as a
+ * reasonable upper bound for correctness.
+ *
+ * \param G The grid object
+ * \return Real The maximum magnetic divergence found in the grid. Can
+ * usually be ignored since all checking is done in the fucntion, mostly
+ * this return is for testing.
+ */
+Real checkMagneticDivergence(Grid3D const &G);
+// =========================================================================
+}  // end namespace mhd
\ No newline at end of file
diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu
new file mode 100644
index 000000000..7d7b35294
--- /dev/null
+++ b/src/mhd/magnetic_divergence_tests.cu
@@ -0,0 +1,69 @@
+/*!
+ * \file magnetic_divergence_tests.cu
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Tests for the magnetic divergence code
+ *
+ */
+
+// STL Includes
+#include <iostream>
+#include <random>
+#include <string>
+#include <vector>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../global/global.h"
+#include "../mhd/magnetic_divergence.h"
+#include "../utils/DeviceVector.h"
+#include "../utils/testing_utilities.h"
+
+#ifdef MHD
+// =============================================================================
+// Tests for the magnetic field divergence functions
+// =============================================================================
+TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput)
+{
+  // Grid Parameters & testing parameters
+  size_t const gridSize = 96;  // Needs to be at least 64 so that each thread has a value
+  size_t const n_ghost  = 4;
+
+  // Instantiate Grid3D object
+  Grid3D G;
+  G.H.dx       = 3;
+  G.H.dy       = G.H.dx;
+  G.H.dz       = G.H.dx;
+  G.H.nx       = gridSize + 2 * n_ghost;
+  G.H.ny       = G.H.nx;
+  G.H.nz       = G.H.nx;
+  G.H.n_cells  = G.H.nx * G.H.ny * G.H.nz;
+  G.H.n_fields = 8;
+
+  // Setup host grid. Fill host grid with random values and randomly assign
+  // maximum value
+  std::vector<Real> host_grid(G.H.n_cells * G.H.n_fields);
+  std::mt19937 prng(1);
+  std::uniform_real_distribution<double> doubleRand(1, 5);
+  for (double& host_data : host_grid) {
+    host_data = doubleRand(prng) / 1E15;
+  }
+
+  // Allocating and copying to device
+  cuda_utilities::DeviceVector<double> dev_grid(host_grid.size());
+  G.C.device = dev_grid.data();
+  dev_grid.cpyHostToDevice(host_grid);
+
+  // Perform test
+  InitializeChollaMPI(NULL, NULL);
+  double max_magnetic_divergence = mhd::checkMagneticDivergence(G);
+  MPI_Finalize();
+  // Perform Comparison
+  Real const fiducialDivergence = 3.6318132783263106 / 1E15;
+  testing_utilities::Check_Results(fiducialDivergence, max_magnetic_divergence, "maximum divergence");
+}
+// =============================================================================
+// End of tests for the magnetic field divergence functions
+// =============================================================================
+#endif  // MHD
diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu
new file mode 100644
index 000000000..acfd44982
--- /dev/null
+++ b/src/mhd/magnetic_update.cu
@@ -0,0 +1,84 @@
+/*!
+ * \file magnetic_update.cu
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains the definition of the kernel to update the magnetic field.
+ * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for
+ * multidimensional MHD" hereafter referred to as "S&G 2009"
+ *
+ */
+
+// STL Includes
+
+// External Includes
+
+// Local Includes
+#include "../mhd/magnetic_update.h"
+#include "../utils/cuda_utilities.h"
+#ifdef MHD
+namespace mhd
+{
+// =========================================================================
+__global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, int const nx,
+                                         int const ny, int const nz, int const n_cells, Real const dt, Real const dx,
+                                         Real const dy, Real const dz)
+{
+  // get a thread index
+  int const blockId  = blockIdx.x + blockIdx.y * gridDim.x;
+  int const threadId = threadIdx.x + blockId * blockDim.x;
+  int xid, yid, zid;
+  cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid);
+
+  // Thread guard to avoid overrun and to skip ghost cells that cannot be
+  // evolved due to missing electric fields that can't be reconstructed
+  if (xid > 0 and yid > 0 and zid > 0 and xid < nx - 1 and yid < ny - 1 and zid < nz - 1) {
+    // Compute the three dt/dx quantities
+    Real const dtodx = dt / dx;
+    Real const dtody = dt / dy;
+    Real const dtodz = dt / dz;
+
+    // Load the various edge electric fields required. The '1' and '2'
+    // fields are not shared and the '3' fields are shared by two of the
+    // updates
+    Real electric_x_1 =
+        ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + grid_enum::ct_elec_x * n_cells];
+    Real electric_x_2 =
+        ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + grid_enum::ct_elec_x * n_cells];
+    Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid + 1, nx, ny)) +
+                                         grid_enum::ct_elec_x * n_cells];
+    Real electric_y_1 =
+        ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + grid_enum::ct_elec_y * n_cells];
+    Real electric_y_2 =
+        ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + grid_enum::ct_elec_y * n_cells];
+    Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid + 1, nx, ny)) +
+                                         grid_enum::ct_elec_y * n_cells];
+    Real electric_z_1 =
+        ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + grid_enum::ct_elec_z * n_cells];
+    Real electric_z_2 =
+        ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + grid_enum::ct_elec_z * n_cells];
+    Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid + 1, zid, nx, ny)) +
+                                         grid_enum::ct_elec_z * n_cells];
+
+    // Perform Updates
+
+    // X field update
+    // S&G 2009 equation 10
+    destinationGrid[threadId + grid_enum::magnetic_x * n_cells] =
+        sourceGrid[threadId + grid_enum::magnetic_x * n_cells] + dtodz * (electric_y_3 - electric_y_1) +
+        dtody * (electric_z_1 - electric_z_3);
+
+    // Y field update
+    // S&G 2009 equation 11
+    destinationGrid[threadId + grid_enum::magnetic_y * n_cells] =
+        sourceGrid[threadId + grid_enum::magnetic_y * n_cells] + dtodx * (electric_z_3 - electric_z_2) +
+        dtodz * (electric_x_1 - electric_x_3);
+
+    // Z field update
+    // S&G 2009 equation 12
+    destinationGrid[threadId + grid_enum::magnetic_z * n_cells] =
+        sourceGrid[threadId + grid_enum::magnetic_z * n_cells] + dtody * (electric_x_3 - electric_x_2) +
+        dtodx * (electric_y_2 - electric_y_3);
+  }
+}
+// =========================================================================
+}  // end namespace mhd
+#endif  // MHD
diff --git a/src/mhd/magnetic_update.h b/src/mhd/magnetic_update.h
new file mode 100644
index 000000000..2601abdb7
--- /dev/null
+++ b/src/mhd/magnetic_update.h
@@ -0,0 +1,51 @@
+/*!
+ * \file magnetic_update.h
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains the declaration of the kernel to update the magnetic field.
+ * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for
+ * multidimensional MHD" hereafter referred to as "S&G 2009"
+ *
+ */
+
+#pragma once
+
+// STL Includes
+
+// External Includes
+
+// Local Includes
+#include "../global/global.h"
+#include "../global/global_cuda.h"
+#include "../utils/gpu.hpp"
+
+/*!
+ * \brief Namespace for MHD code
+ *
+ */
+namespace mhd
+{
+// =========================================================================
+/*!
+ * \brief Update the magnetic field using the CT electric fields
+ *
+ * \param[in] sourceGrid The array which holds the old values of the
+ * magnetic field
+ * \param[out] destinationGrid The array to hold the updated values of the
+ * magnetic field
+ * \param[in] ctElectricFields The array of constrained transport electric
+ * fields
+ * \param[in] nx The number of cells in the x-direction
+ * \param[in] ny The number of cells in the y-direction
+ * \param[in] nz The number of cells in the z-direction
+ * \param[in] n_cells The total number of cells
+ * \param[in] dt The time step. If doing the half time step update make sure
+ * to divide it by two when passing the time step to this kernel
+ * \param[in] dx The size of each cell in the x-direction
+ * \param[in] dy The size of each cell in the y-direction
+ * \param[in] dz The size of each cell in the z-direction
+ */
+__global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, int const nx,
+                                         int const ny, int const nz, int const n_cells, Real const dt, Real const dx,
+                                         Real const dy, Real const dz);
+// =========================================================================
+}  // end namespace mhd
\ No newline at end of file
diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu
new file mode 100644
index 000000000..7cfb8757c
--- /dev/null
+++ b/src/mhd/magnetic_update_tests.cu
@@ -0,0 +1,127 @@
+/*!
+ * \file magnetic_update_tests.cu
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Tests for the magnetic update code
+ *
+ */
+
+// STL Includes
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../mhd/magnetic_update.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/testing_utilities.h"
+
+#ifdef MHD
+// =============================================================================
+/*!
+ * \brief Test fixture for tMHDUpdateMagneticField3D test suite
+ *
+ */
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tMHDUpdateMagneticField3D : public ::testing::Test
+{
+ public:
+  /*!
+   * \brief Initialize and allocate all the various required variables and
+   * arrays
+   *
+   */
+  tMHDUpdateMagneticField3D()
+      : n_cells(nx * ny * nz),
+        sourceGrid(n_cells * (grid_enum::num_fields)),
+        destinationGrid(n_cells * (grid_enum::num_fields), -999.),
+        ctElectricFields(n_cells * 3),
+        fiducialData(n_cells * (grid_enum::num_fields), -999.),
+        dimGrid((n_cells + TPB - 1) / TPB, 1, 1),
+        dimBlock(TPB, 1, 1)
+  {
+    // Allocate device arrays
+    GPU_Error_Check(cudaMalloc(&dev_sourceGrid, sourceGrid.size() * sizeof(double)));
+    GPU_Error_Check(cudaMalloc(&dev_destinationGrid, destinationGrid.size() * sizeof(double)));
+    GPU_Error_Check(cudaMalloc(&dev_ctElectricFields, ctElectricFields.size() * sizeof(double)));
+
+    // Populate the grids with values where vector.at(i) = double(i). The
+    // values chosen aren't that important, just that every cell has a unique
+    // value
+    std::iota(std::begin(sourceGrid), std::end(sourceGrid), 0.);
+    std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), sourceGrid.back() + 1);
+  }
+  ~tMHDUpdateMagneticField3D() = default;
+
+ protected:
+  // Initialize the test grid and other state variables
+  size_t const nx = 3, ny = nx, nz = nx;
+  size_t const n_cells;
+  Real const dt = 3.2, dx = 2.5, dy = dx, dz = dx;
+
+  // Launch Parameters
+  dim3 const dimGrid;   // How many blocks in the grid
+  dim3 const dimBlock;  // How many threads per block
+
+  // Make sure the vector is large enough that the locations where the
+  // magnetic field would be in the real grid are filled
+  std::vector<double> sourceGrid;
+  std::vector<double> destinationGrid;
+  std::vector<double> ctElectricFields;
+  std::vector<double> fiducialData;
+
+  // device pointers
+  double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, *dev_fiducialData;
+
+  /*!
+   * \brief Launch the kernel and check results
+   *
+   */
+  void Run_Test()
+  {
+    // Copy values to GPU
+    GPU_Error_Check(
+        cudaMemcpy(dev_sourceGrid, sourceGrid.data(), sourceGrid.size() * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size() * sizeof(Real),
+                               cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size() * sizeof(Real),
+                               cudaMemcpyHostToDevice));
+
+    // Call the kernel to test
+    hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dimGrid, dimBlock, 0, 0, dev_sourceGrid, dev_destinationGrid,
+                       dev_ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz);
+    GPU_Error_Check();
+
+    // Copy test data back
+    GPU_Error_Check(cudaMemcpy(destinationGrid.data(), dev_destinationGrid, destinationGrid.size() * sizeof(Real),
+                               cudaMemcpyDeviceToHost));
+    cudaDeviceSynchronize();
+
+    // Check the results
+    for (size_t i = 0; i < fiducialData.size(); i++) {
+      int xid, yid, zid;
+      cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid);
+      testing_utilities::Check_Results(fiducialData.at(i), destinationGrid.at(i),
+                                       "value at i = " + std::to_string(i) + ", xid  = " + std::to_string(xid) +
+                                           ", yid  = " + std::to_string(yid) + ", zid  = " + std::to_string(zid));
+    }
+  }
+};
+// =============================================================================
+
+// =============================================================================
+TEST_F(tMHDUpdateMagneticField3D, CorrectInputExpectCorrectOutput)
+{
+  // Fiducial values
+  fiducialData.at(148) = 155.68000000000001;
+  fiducialData.at(175) = 164.75999999999999;
+  fiducialData.at(202) = 204.56;
+
+  // Launch kernel and check results
+  Run_Test();
+}
+// =============================================================================
+#endif  // MHD
diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp
index f6ada7002..055827001 100644
--- a/src/model/disk_ICs.cpp
+++ b/src/model/disk_ICs.cpp
@@ -1,300 +1,296 @@
 /*! \file disk_ICs.cpp
  *  \brief Definitions of initial conditions for hydrostatic disks.
-           Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. */
+           Note that the grid is mapped to 1D as i + (x_dim)*j +
+ (x_dim*y_dim)*k. */
 
-#include <stdlib.h>
-#include <stdio.h>
 #include <math.h>
-#include <cmath>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <time.h>
+
+#include <cmath>
+
 #include "../global/global.h"
 #include "../grid/grid3D.h"
-#include "../mpi/mpi_routines.h"
 #include "../io/io.h"
+#include "../mpi/mpi_routines.h"
 #include "../utils/error_handling.h"
-
+#include "disk_galaxy.h"
 
 // #define DISK_ICS
 
 // function with logarithms used in NFW definitions
-Real log_func(Real y)
-{
-  return log(1+y) - y/(1+y);
-}
+Real Log_Func(Real y) { return log(1 + y) - y / (1 + y); }
 
-//vertical acceleration in NFW halo
-Real gz_halo_D3D(Real R, Real z, Real *hdp)
+// vertical acceleration in NFW halo
+Real Gz_Halo_D3D(Real R, Real z, Real *hdp)
 {
-  Real M_h = hdp[2]; //halo mass
-  Real R_h = hdp[5]; //halo scale length
-  Real c_vir = hdp[4]; //halo concentration parameter
-  Real r = sqrt(R*R + z*z); //spherical radius
-  Real x = r / R_h;
-  Real z_comp = z/r;
-
-  Real A = log_func(x);
-  Real B = 1.0 / (r*r);
-  Real C = GN*M_h/log_func(c_vir);
-
-  //checked with wolfram alpha
-  return -C*A*B*z_comp;
+  Real M_h    = hdp[2];               // halo mass
+  Real R_h    = hdp[5];               // halo scale length
+  Real c_vir  = hdp[4];               // halo concentration parameter
+  Real r      = sqrt(R * R + z * z);  // spherical radius
+  Real x      = r / R_h;
+  Real z_comp = z / r;
+
+  Real A = Log_Func(x);
+  Real B = 1.0 / (r * r);
+  Real C = GN * M_h / Log_Func(c_vir);
+
+  // checked with wolfram alpha
+  return -C * A * B * z_comp;
 }
 
-
-//radial acceleration in NFW halo
-Real gr_halo_D3D(Real R, Real z, Real *hdp)
+// radial acceleration in NFW halo
+Real Gr_Halo_D3D(Real R, Real z, Real *hdp)
 {
-  Real M_h = hdp[2]; //halo mass
-  Real R_h = hdp[5]; //halo scale length
-  Real c_vir = hdp[4]; //halo concentration parameter
-  Real r = sqrt(R*R + z*z); //spherical radius
-  Real x = r / R_h;
-  Real r_comp = R/r;
-
-  Real A = log_func(x);
-  Real B = 1.0 / (r*r);
-  Real C = GN*M_h/log_func(c_vir);
-
-  //checked with wolfram alpha
-  return -C*A*B*r_comp;
+  Real M_h    = hdp[2];               // halo mass
+  Real R_h    = hdp[5];               // halo scale length
+  Real c_vir  = hdp[4];               // halo concentration parameter
+  Real r      = sqrt(R * R + z * z);  // spherical radius
+  Real x      = r / R_h;
+  Real r_comp = R / r;
+
+  Real A = Log_Func(x);
+  Real B = 1.0 / (r * r);
+  Real C = GN * M_h / Log_Func(c_vir);
+
+  // checked with wolfram alpha
+  return -C * A * B * r_comp;
 }
 
-//disk radial surface density profile
-Real Sigma_disk_D3D(Real r, Real *hdp)
+// disk radial surface density profile
+Real Sigma_Disk_D3D(Real r, Real *hdp)
 {
-  //return the exponential surface density
+  // return the exponential surface density
   Real Sigma_0 = hdp[9];
   Real R_g     = hdp[10];
-  Real R_c = 4.5;
+  Real R_c     = 4.5;
   Real Sigma;
   Real delta = 0.1;
-  Real norm = log(1.0/3.0);
-  Sigma = Sigma_0 * exp(-r/R_g);
+  Real norm  = log(1.0 / 3.0);
+  Sigma      = Sigma_0 * exp(-r / R_g);
   // taper the edge of the disk to 0
   if (r < R_c) {
-    Sigma *= 2.0 - 1.0 / (1.0 - exp((r - (4.5 - delta*norm))/delta));
-  }
-  else {
-    Sigma *= 1.0 / (1.0 - exp(((4.5 + delta*norm) - r)/delta)) - 1.0;
+    Sigma *= 2.0 - 1.0 / (1.0 - exp((r - (4.5 - delta * norm)) / delta));
+  } else {
+    Sigma *= 1.0 / (1.0 - exp(((4.5 + delta * norm) - r) / delta)) - 1.0;
   }
   return Sigma;
 }
 
-//vertical acceleration in miyamoto nagai
-Real gz_disk_D3D(Real R, Real z, Real *hdp)
+// vertical acceleration in miyamoto nagai
+Real Gz_Disk_D3D(Real R, Real z, Real *hdp)
 {
-  Real M_d = hdp[1]; //disk mass
-  Real R_d = hdp[6]; //MN disk length
-  Real Z_d = hdp[7]; //MN disk height
-  Real a = R_d;
-  Real b = Z_d;
-  Real A = sqrt(b*b + z*z);
-  Real B = a + A;
-  Real C = pow(B*B + R*R, 1.5);
-
-  //checked with wolfram alpha
-  return -GN*M_d*z*B/(A*C);
+  Real M_d = hdp[1];  // disk mass
+  Real R_d = hdp[6];  // MN disk length
+  Real Z_d = hdp[7];  // MN disk height
+  Real a   = R_d;
+  Real b   = Z_d;
+  Real A   = sqrt(b * b + z * z);
+  Real B   = a + A;
+  Real C   = pow(B * B + R * R, 1.5);
+
+  // checked with wolfram alpha
+  return -GN * M_d * z * B / (A * C);
 }
 
-//radial acceleration in miyamoto nagai
-Real gr_disk_D3D(Real R, Real z, Real *hdp)
+// radial acceleration in miyamoto nagai
+Real Gr_Disk_D3D(Real R, Real z, Real *hdp)
 {
-  Real M_d = hdp[1]; //disk mass
-  Real R_d = hdp[6]; //MN disk length
-  Real Z_d = hdp[7]; //MN disk height
-  Real A = sqrt(Z_d*Z_d + z*z);
-  Real B = R_d + A;
-  Real C = pow(B*B + R*R, 1.5);
-
-  //checked with wolfram alpha
-  return -GN*M_d*R/C;
+  Real M_d = hdp[1];  // disk mass
+  Real R_d = hdp[6];  // MN disk length
+  Real Z_d = hdp[7];  // MN disk height
+  Real A   = sqrt(Z_d * Z_d + z * z);
+  Real B   = R_d + A;
+  Real C   = pow(B * B + R * R, 1.5);
+
+  // checked with wolfram alpha
+  return -GN * M_d * R / C;
 }
 
-
-//NFW halo potential
-Real phi_halo_D3D(Real R, Real z, Real *hdp)
+// NFW halo potential
+Real Phi_Halo_D3D(Real R, Real z, Real *hdp)
 {
-  Real M_h = hdp[2]; //halo mass
-  Real R_h = hdp[5]; //halo scale length
-  Real c_vir = hdp[4]; //halo concentration parameter
-  Real r = sqrt(R*R + z*z); //spherical radius
-  Real x = r / R_h;
+  Real M_h   = hdp[2];               // halo mass
+  Real R_h   = hdp[5];               // halo scale length
+  Real c_vir = hdp[4];               // halo concentration parameter
+  Real r     = sqrt(R * R + z * z);  // spherical radius
+  Real x     = r / R_h;
 
-  Real C = GN*M_h/(R_h*log_func(c_vir));
+  Real C = GN * M_h / (R_h * Log_Func(c_vir));
 
-  //limit x to non-zero value
-  if(x<1.0e-9)
+  // limit x to non-zero value
+  if (x < 1.0e-9) {
     x = 1.0e-9;
+  }
 
-  //checked with wolfram alpha
-  return -C*log(1+x)/x;
+  // checked with wolfram alpha
+  return -C * log(1 + x) / x;
 }
 
-//Miyamoto-Nagai potential
-Real phi_disk_D3D(Real R, Real z, Real *hdp)
+// Miyamoto-Nagai potential
+Real Phi_Disk_D3D(Real R, Real z, Real *hdp)
 {
-  Real M_d = hdp[1]; //disk mass
-  Real R_d = hdp[6]; //MN disk length
-  Real Z_d = hdp[7]; //MN disk height
-  Real A = sqrt(z*z + Z_d*Z_d);
-  Real B = R_d + A;
-  Real C = sqrt(R*R + B*B);
-
-  //patel et al. 2017, eqn 2
-  return -GN*M_d/C;
+  Real M_d = hdp[1];  // disk mass
+  Real R_d = hdp[6];  // MN disk length
+  Real Z_d = hdp[7];  // MN disk height
+  Real A   = sqrt(z * z + Z_d * Z_d);
+  Real B   = R_d + A;
+  Real C   = sqrt(R * R + B * B);
+
+  // patel et al. 2017, eqn 2
+  return -GN * M_d / C;
 }
 
-//total potential
-Real phi_total_D3D(Real R, Real z, Real *hdp)
+// total potential
+Real Phi_Total_D3D(Real R, Real z, Real *hdp)
 {
-  Real Phi_A = phi_halo_D3D(R,z,hdp);
-  Real Phi_B = phi_disk_D3D(R,z,hdp);
+  Real Phi_A = Phi_Halo_D3D(R, z, hdp);
+  Real Phi_B = Phi_Disk_D3D(R, z, hdp);
   return Phi_A + Phi_B;
 }
 
-Real phi_hot_halo_D3D(Real r, Real *hdp)
+Real Phi_Hot_Halo_D3D(Real r, Real *hdp)
 {
-  Real Phi_A = phi_halo_D3D(0,r,hdp);
-  Real Phi_B = phi_disk_D3D(0,r,hdp);
-  //return Phi_A;
+  Real Phi_A = Phi_Halo_D3D(0, r, hdp);
+  Real Phi_B = Phi_Disk_D3D(0, r, hdp);
+  // return Phi_A;
   return Phi_A + Phi_B;
 }
 
-
-//returns the cell-centered vertical
-//location of the cell with index k
-//k is indexed at 0 at the lowest ghost cell
-Real z_hc_D3D(int k, Real dz, int nz, int ng)
+// returns the cell-centered vertical
+// location of the cell with index k
+// k is indexed at 0 at the lowest ghost cell
+Real Z_Hc_D3D(int k, Real dz, int nz, int ng)
 {
-  //checked that this works, such that the
-  //if dz = L_z/nz for the real domain, then the z positions
-  //are set correctly for cell centers with nz spanning
-  //the real domain, and nz + 2*ng spanning the real + ghost domains
-  if(!(nz%2))
-  {
-    //even # of cells
-    return 0.5*dz + ((Real) (k-ng-nz/2))*dz;
-  }else{
-    //odd # of cells
-    return ((Real) (k-ng-(nz-1)/2))*dz;
+  // checked that this works, such that the
+  // if dz = L_z/nz for the real domain, then the z positions
+  // are set correctly for cell centers with nz spanning
+  // the real domain, and nz + 2*ng spanning the real + ghost domains
+  if (!(nz % 2)) {
+    // even # of cells
+    return 0.5 * dz + ((Real)(k - ng - (int)(nz / 2))) * dz;
+  } else {
+    // odd # of cells
+    return ((Real)(k - ng - (int)((nz - 1) / 2))) * dz;
   }
 }
 
-//returns the cell-centered radial
-//location of the cell with index i
-Real r_hc_D3D(int i, Real dr)
+// returns the cell-centered radial
+// location of the cell with index i
+Real R_Hc_D3D(int i, Real dr)
 {
-  //the zeroth cell is centered at 0.5*dr
-  return 0.5*dr + ((Real) i)*dr;
+  // the zeroth cell is centered at 0.5*dr
+  return 0.5 * dr + ((Real)i) * dr;
 }
 
-
-
-/*! \fn void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr)
- *  \brief Calculate the density at spherical radius r due to a hydrostatic halo. Uses an analytic
-    expression normalized by the value of the potential at the cooling radius. */
-void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr)
+/*! \fn void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real
+ dr, int nr)
+ *  \brief Calculate the density at spherical radius r due to a hydrostatic
+ halo. Uses an analytic expression normalized by the value of the potential at
+ the cooling radius. */
+void Hydrostatic_Ray_Analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr)
 {
-  //Routine to determine the hydrostatic density profile
-  //along a ray from the galaxy center
-  int i;        //index along r direction
+  // Routine to determine the hydrostatic density profile
+  // along a ray from the galaxy center
+  int i;  // index along r direction
 
-  Real gamma   = hdp[13]; //adiabatic index
-  Real rho_eos = hdp[18]; //density where K_EOS is set
-  Real cs      = hdp[19]; //sound speed at rho_eos
-  Real r_cool  = hdp[20]; //cooling radius
+  Real gamma   = hdp[13];  // adiabatic index
+  Real rho_eos = hdp[18];  // density where K_EOS is set
+  Real cs      = hdp[19];  // sound speed at rho_eos
+  Real r_cool  = hdp[20];  // cooling radius
 
-  Real Phi_0; //potential at cooling radius
+  Real Phi_0;  // potential at cooling radius
 
-  Real D_rho; //ratio of density at mid plane and rho_eos
+  Real D_rho;  // ratio of density at mid plane and rho_eos
 
-  Real gmo = gamma - 1.0; //gamma-1
+  Real gmo = gamma - 1.0;  // gamma-1
 
-  //compute the potential at the cooling radius
-  Phi_0 = phi_hot_halo_D3D(r_cool,hdp);
+  // compute the potential at the cooling radius
+  Phi_0 = Phi_Hot_Halo_D3D(r_cool, hdp);
 
-  //We are normalizing to the central density
-  //so D_rho == 1
+  // We are normalizing to the central density
+  // so D_rho == 1
   D_rho = 1.0;
 
-  //store densities
-  for(i=0;i<nr;i++)
-  {
-    r[i] = r_hc_D3D(i,dr);
-    rho[i] = rho_eos*pow(D_rho - gmo*(phi_hot_halo_D3D(r[i],hdp)-Phi_0)/(cs*cs),1./gmo);
+  // store densities
+  for (i = 0; i < nr; i++) {
+    r[i]   = R_Hc_D3D(i, dr);
+    rho[i] = rho_eos * pow(D_rho - gmo * (Phi_Hot_Halo_D3D(r[i], hdp) - Phi_0) / (cs * cs), 1. / gmo);
   }
 }
 
-/*! \fn void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng)
- *  \brief Calculate the 1D density distribution in a hydrostatic column, assuming an isothermal gas.
-     Uses an iterative to scheme to determine the density at (R, z=0) relative to (R=0,z=0),
-     then sets the densities according to an analytic expression. */
-void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng)
+/*! \fn void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp,
+ Real dz, int nz, int ng)
+ *  \brief Calculate the 1D density distribution in a hydrostatic column,
+ assuming an isothermal gas. Uses an iterative to scheme to determine the
+ density at (R, z=0) relative to (R=0,z=0), then sets the densities according to
+ an analytic expression. */
+void Hydrostatic_Column_Isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng)
 {
-  //x is cell center in x direction
-  //y is cell center in y direction
-  //dz is cell width in z direction
-  //nz is number of real cells
-  //ng is number of ghost cells
-  //total number of cells in column is nz * 2*ng
-  //hdp[0] = M_vir;
-  //hdp[1] = M_d;
-  //hdp[2] = M_h;
-  //hdp[3] = R_vir;
-  //hdp[4] = c_vir;
-  //hdp[5] = R_s;
-  //hdp[6] = R_d;
-  //hdp[7] = z_d;
-  //hdp[8] = T_d;
-  //hdp[9] = Sigma_0;
-  //hdp[10] = R_g;
-  //hdp[11] = H_g;
-  //hdp[12] = K_eos;
-  //hdp[13] = gamma;
-  //hdp[14] = rho_floor;
-  //hdp[15] = rho_eos;
-  //hdp[16] = cs;
-
-  int i,k;        //index along z axis
-  int nzt;      //total number of cells in z-direction
-  Real Sigma_r; //surface density expected at r
-
-  Real cs      = hdp[16];
-
-  Real Phi_0; //potential at z=0
-
-  Real rho_0; //density at mid plane
-  Real D_rho; //ratio of density at mid plane and rho_eos
-
-  Real z_0, z_1; // heights for iteration
+  // x is cell center in x direction
+  // y is cell center in y direction
+  // dz is cell width in z direction
+  // nz is number of real cells
+  // ng is number of ghost cells
+  // total number of cells in column is nz * 2*ng
+  // hdp[0] = M_vir;
+  // hdp[1] = M_d;
+  // hdp[2] = M_h;
+  // hdp[3] = R_vir;
+  // hdp[4] = c_vir;
+  // hdp[5] = R_s;
+  // hdp[6] = R_d;
+  // hdp[7] = z_d;
+  // hdp[8] = T_d;
+  // hdp[9] = Sigma_0;
+  // hdp[10] = R_g;
+  // hdp[11] = H_g;
+  // hdp[12] = K_eos;
+  // hdp[13] = gamma;
+  // hdp[14] = rho_floor;
+  // hdp[15] = rho_eos;
+  // hdp[16] = cs;
+
+  int i, k;      // index along z axis
+  int nzt;       // total number of cells in z-direction
+  Real Sigma_r;  // surface density expected at r
+
+  Real cs = hdp[16];
+
+  Real Phi_0;  // potential at z=0
+
+  Real rho_0;  // density at mid plane
+  Real D_rho;  // ratio of density at mid plane and rho_eos
+
+  Real z_0, z_1;  // heights for iteration
   Real z_disk_max;
 
-  //density integration
+  // density integration
   Real phi_int;
   Real z_int_min, z_int_max, dz_int;
   Real Delta_phi;
   int n_int = 1000;
-  int flag; //flag for integration
-
-  int ks; //start of integrals above disk plane
-  int km; //mirror of k
-  if(nz%2)
-  {
-    ks = ng+(nz-1)/2;
-  }else{
-    ks = ng + nz/2;
+  int flag;  // flag for integration
+
+  int ks;  // start of integrals above disk plane
+  int km;  // mirror of k
+  if (nz % 2) {
+    ks = ng + (nz - 1) / 2;
+  } else {
+    ks = ng + nz / 2;
   }
 
   // get the disk surface density
   // have verified that at this point, Sigma_r is correct
-  Sigma_r = Sigma_disk_D3D(R, hdp);
+  Sigma_r = Sigma_Disk_D3D(R, hdp);
 
-  //set the z-column size, including ghost cells
-  nzt = nz + 2*ng;
+  // set the z-column size, including ghost cells
+  nzt = nz + 2 * ng;
 
-  //compute the mid plane potential
-  Phi_0 = phi_total_D3D(R,0,hdp);
+  // compute the mid plane potential
+  Phi_0 = Phi_Total_D3D(R, 0, hdp);
 
   /* For an isothermal gas, we have
 
@@ -309,409 +305,399 @@ void hydrostatic_column_isothermal_D3D(Real *rho, Real R, Real *hdp, Real dz, in
 
   */
 
-  //perform a simple check about the fraction of density within
-  //a single cell
-  z_1   = z_hc_D3D(ks,dz,nz,ng) + 0.5*dz;//cell ceiling
-  D_rho = (phi_total_D3D(R,z_1,hdp)-Phi_0)/(cs*cs);
-
-  if(exp(-1*D_rho)<0.1)
-    printf("WARNING: >0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e Phi_0 %E cs %e\n",R,D_rho,z_1,phi_total_D3D(R,z_1,hdp),Phi_0,cs);
+  // perform a simple check about the fraction of density within
+  // a single cell
+  z_1   = Z_Hc_D3D(ks, dz, nz, ng) + 0.5 * dz;  // cell ceiling
+  D_rho = (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs);
 
+  if (exp(-1 * D_rho) < 0.1) {
+    printf(
+        "WARNING: >0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e "
+        "Phi_0 %E cs %e\n",
+        R, D_rho, z_1, Phi_Total_D3D(R, z_1, hdp), Phi_0, cs);
+  }
 
-  //let's find the cell above the disk where the
-  //density falls by exp(-7) < 1.0e-3.
-  for(k=ks;k<nzt;k++)
-  {
-    z_1   = z_hc_D3D(k,dz,nz,ng) + 0.5*dz;//cell ceiling
-    D_rho = (phi_total_D3D(R,z_1,hdp)-Phi_0)/(cs*cs);
-    if(D_rho>=7.0)
+  // let's find the cell above the disk where the
+  // density falls by exp(-7) < 1.0e-3.
+  for (k = ks; k < nzt; k++) {
+    z_1   = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz;  // cell ceiling
+    D_rho = (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs);
+    if (D_rho >= 7.0) {
       break;
+    }
   }
-  //if(R<1.0)
-  //  printf("Cells above disk (k-ks) = %d, z_1 = %e, exp(-D) = %e, R = %e\n",k-ks,z_1,exp(-1*D_rho),R);
+  // if(R<1.0)
+  //   printf("Cells above disk (k-ks) = %d, z_1 = %e, exp(-D) = %e, R =
+  //   %e\n",k-ks,z_1,exp(-1*D_rho),R);
 
-  //now we can compute the unnormalized integral of the density
+  // now we can compute the unnormalized integral of the density
   z_disk_max = z_1;
 
-  //Compute surface density
-  z_int_min = 0.0; //kpc
-  z_int_max = z_1; //kpc
-  dz_int = (z_int_max-z_int_min)/((Real) (n_int));
-  phi_int = 0.0;
-  for(k=0;k<n_int;k++)
-  {
-    z_0 = 0.5*dz_int + dz_int*((Real) k);
-    Delta_phi = (phi_total_D3D(R,z_0,hdp)-Phi_0)/(cs*cs);
-    phi_int  += exp(-1*Delta_phi)*dz_int;
+  // Compute surface density
+  z_int_min = 0.0;  // kpc
+  z_int_max = z_1;  // kpc
+  dz_int    = (z_int_max - z_int_min) / ((Real)(n_int));
+  phi_int   = 0.0;
+  for (k = 0; k < n_int; k++) {
+    z_0       = 0.5 * dz_int + dz_int * ((Real)k);
+    Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs);
+    phi_int += exp(-1 * Delta_phi) * dz_int;
   }
 
-  //compute the central density
-  rho_0 = 0.5*Sigma_r/phi_int;
+  // compute the central density
+  rho_0 = 0.5 * Sigma_r / phi_int;
 
-  //OK, at this stage we know how to set the densities
-  //so let's take cell averages
+  // OK, at this stage we know how to set the densities
+  // so let's take cell averages
   flag  = 0;
-  n_int = 10; // integrate over a 1/10 cell
-  for(k=ks;k<nzt;k++)
-  {
-    //find cell center, bottom, and top
-    z_int_min  = z_hc_D3D(k,dz,nz,ng) - 0.5*dz;
-    z_int_max  = z_hc_D3D(k,dz,nz,ng) + 0.5*dz;
-    if(z_int_max>z_disk_max)
+  n_int = 10;  // integrate over a 1/10 cell
+  for (k = ks; k < nzt; k++) {
+    // find cell center, bottom, and top
+    z_int_min = Z_Hc_D3D(k, dz, nz, ng) - 0.5 * dz;
+    z_int_max = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz;
+    if (z_int_max > z_disk_max) {
       z_int_max = z_disk_max;
-    if(!flag)
-    {
-      dz_int = (z_int_max-z_int_min)/((Real) (n_int));
+    }
+    if (!flag) {
+      dz_int  = (z_int_max - z_int_min) / ((Real)(n_int));
       phi_int = 0.0;
-      for(i=0;i<n_int;i++)
-      {
-        z_0 = 0.5*dz_int + dz_int*((Real) i) + z_int_min;
-        Delta_phi = (phi_total_D3D(R,z_0,hdp)-Phi_0)/(cs*cs);
-        phi_int += rho_0*exp(-1*Delta_phi)*dz_int;
+      for (i = 0; i < n_int; i++) {
+        z_0       = 0.5 * dz_int + dz_int * ((Real)i) + z_int_min;
+        Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs);
+        phi_int += rho_0 * exp(-1 * Delta_phi) * dz_int;
       }
 
-      //set density based on integral
-      //of density in this cell
-      rho[k] = phi_int/dz;
+      // set density based on integral
+      // of density in this cell
+      rho[k] = phi_int / dz;
 
-      if(z_int_max==z_disk_max)
-      {
+      if (z_int_max == z_disk_max) {
         flag = 1;
       }
-    }else{
-      //no mass up here!
+    } else {
+      // no mass up here!
       rho[k] = 0;
     }
 
-    //mirror densities
-    //above and below disk plane
-    if(nz%2)
-    {
-      km = (ng+(nz-1)/2) - (k-ks);
-    }else{
-      km = ng + nz/2 - (k-ks) -1;
+    // mirror densities
+    // above and below disk plane
+    if (nz % 2) {
+      km = (ng + (nz - 1) / 2) - (k - ks);
+    } else {
+      km = ng + nz / 2 - (k - ks) - 1;
     }
     rho[km] = rho[k];
   }
 
-  //check the surface density
+  // check the surface density
   phi_int = 0.0;
-  for(k=0;k<nzt;k++)
-    phi_int += rho[k]*dz;
+  for (k = 0; k < nzt; k++) {
+    phi_int += rho[k] * dz;
+  }
 
-  //printf("Surface density check R %e Sigma_r %e integral(rho*dz) %e\n",R,Sigma_r,phi_int);
-  //printf("Done with isothermal disk.\n");
+  // printf("Surface density check R %e Sigma_r %e integral(rho*dz)
+  // %e\n",R,Sigma_r,phi_int); printf("Done with isothermal disk.\n");
 }
 
-/*! \fn void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng)
+/*! \fn void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp,
+ Real dz, int nz, int ng)
  *  \brief Calculate the 1D density distribution in a hydrostatic column.
-     Uses an iterative to scheme to determine the density at (R, z=0) relative to (R=0,z=0),
-     then sets the densities according to an analytic expression. */
-void hydrostatic_column_analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng)
+     Uses an iterative to scheme to determine the density at (R, z=0) relative
+ to (R=0,z=0), then sets the densities according to an analytic expression. */
+void Hydrostatic_Column_Analytical_D3D(Real *rho, Real R, Real *hdp, Real dz, int nz, int ng)
 {
-  //x is cell center in x direction
-  //y is cell center in y direction
-  //dz is cell width in z direction
-  //nz is number of real cells
-  //ng is number of ghost cells
-  //total number of cells in column is nz * 2*ng
-  //hdp[0] = M_vir;
-  //hdp[1] = M_d;
-  //hdp[2] = M_h;
-  //hdp[3] = R_vir;
-  //hdp[4] = c_vir;
-  //hdp[5] = R_s;
-  //hdp[6] = R_d;
-  //hdp[7] = z_d;
-  //hdp[8] = T_d;
-  //hdp[9] = Sigma_0;
-  //hdp[10] = R_g;
-  //hdp[11] = H_g;
-  //hdp[12] = K_eos;
-  //hdp[13] = gamma;
-  //hdp[14] = rho_floor;
-  //hdp[15] = rho_eos;
-  //hdp[16] = cs;
-
-  int i,k;        //index along z axis
-  int nzt;      //total number of cells in z-direction
-  Real Sigma_r; //surface density expected at r
-  Real Sigma_0 = hdp[9]; //central surface density
-  Real gamma = hdp[13];
-  //Real gamma = 1.001; // CHANGED FOR ISOTHERMAL
+  // x is cell center in x direction
+  // y is cell center in y direction
+  // dz is cell width in z direction
+  // nz is number of real cells
+  // ng is number of ghost cells
+  // total number of cells in column is nz * 2*ng
+  // hdp[0] = M_vir;
+  // hdp[1] = M_d;
+  // hdp[2] = M_h;
+  // hdp[3] = R_vir;
+  // hdp[4] = c_vir;
+  // hdp[5] = R_s;
+  // hdp[6] = R_d;
+  // hdp[7] = z_d;
+  // hdp[8] = T_d;
+  // hdp[9] = Sigma_0;
+  // hdp[10] = R_g;
+  // hdp[11] = H_g;
+  // hdp[12] = K_eos;
+  // hdp[13] = gamma;
+  // hdp[14] = rho_floor;
+  // hdp[15] = rho_eos;
+  // hdp[16] = cs;
+
+  int i, k;               // index along z axis
+  int nzt;                // total number of cells in z-direction
+  Real Sigma_r;           // surface density expected at r
+  Real Sigma_0 = hdp[9];  // central surface density
+  Real gamma   = hdp[13];
+  // Real gamma = 1.001; // CHANGED FOR ISOTHERMAL
 
   Real rho_eos = hdp[15];
   Real cs      = hdp[16];
 
-  Real Phi_0; //potential at z=0
+  Real Phi_0;  // potential at z=0
 
-  Real D_rho; //ratio of density at mid plane and rho_eos
-  Real D_new; //new ratio of density at mid plane and rho_eos
+  Real D_rho;  // ratio of density at mid plane and rho_eos
+  Real D_new;  // new ratio of density at mid plane and rho_eos
 
-  Real z_0, z_1, z_2; // heights for iteration
+  Real z_0, z_1, z_2;  // heights for iteration
   Real z_disk_max;
-  Real A_0, A_1; // density function to find roots
+  Real A_0, A_1;  // density function to find roots
 
-  //density integration
+  // density integration
   Real phi_int, A;
   Real z_int_min, z_int_max, dz_int;
   Real Delta_phi;
   int n_int = 1000;
 
-  //tolerance for secant method
+  // tolerance for secant method
   Real tol = 1.0e-6;
 
-  //tolerance for surface density
-  //fractional
+  // tolerance for surface density
+  // fractional
   Real D_tol = 1.0e-5;
 
-
-  int ks; //start of integrals above disk plane
-  int km; //mirror of k
-  if(nz%2)
-  {
-    ks = ng+(nz-1)/2;
-  }else{
-    ks = ng + nz/2;
+  int ks;  // start of integrals above disk plane
+  int km;  // mirror of k
+  if (nz % 2) {
+    ks = ng + (nz - 1) / 2;
+  } else {
+    ks = ng + nz / 2;
   }
 
   // get the disk surface density
   // have verified that at this point, Sigma_r is correct
-  Sigma_r = Sigma_disk_D3D(R, hdp);
+  Sigma_r = Sigma_Disk_D3D(R, hdp);
 
-  //set the z-column size, including ghost cells
-  nzt = nz + 2*ng;
+  // set the z-column size, including ghost cells
+  nzt = nz + 2 * ng;
 
-  //compute the mid plane potential
-  Phi_0 = phi_total_D3D(R,0,hdp);
+  // compute the mid plane potential
+  Phi_0 = Phi_Total_D3D(R, 0, hdp);
 
-  //pick a fiducial guess for density ratio
-  D_rho = pow( Sigma_r/Sigma_0, gamma-1. );
+  // pick a fiducial guess for density ratio
+  D_rho = pow(Sigma_r / Sigma_0, gamma - 1.);
 
-  //begin iterative determination of density field
+  // begin iterative determination of density field
   int flag = 0;
-  int iter = 0; //number if iterations
-  int flag_phi; //flag for density extent
+  int iter = 0;  // number if iterations
+  int flag_phi;  // flag for density extent
   int iter_phi;
 
   D_new = D_rho;
-  while(!flag)
-  {
-    //iterate the density ratio
+  while (!flag) {
+    // iterate the density ratio
     D_rho = D_new;
 
-    //first determine the range of z where
-    //the density above the central disk plane is
-    //non-zero
+    // first determine the range of z where
+    // the density above the central disk plane is
+    // non-zero
 
-    //get started, find the maximum extent of the disk
+    // get started, find the maximum extent of the disk
     iter_phi = 0;
     flag_phi = 0;
-    z_0 = 1.0e-3;
-    z_1 = 1.0e-2;
-    while(!flag_phi)
-    {
-      A_0 = D_rho - (phi_total_D3D(R,z_0,hdp)-Phi_0)/(cs*cs);
-      A_1 = D_rho - (phi_total_D3D(R,z_1,hdp)-Phi_0)/(cs*cs);
-      z_2 = z_1 - A_1*(z_1-z_0)/(A_1-A_0);
-      if( fabs(z_2-z_1)/fabs(z_1) > 10.)
-        z_2 = 10.*z_1;
-      //advance limit
+    z_0      = 1.0e-3;
+    z_1      = 1.0e-2;
+    while (!flag_phi) {
+      A_0 = D_rho - (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs);
+      A_1 = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs);
+      z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0);
+      if (fabs(z_2 - z_1) / fabs(z_1) > 10.) {
+        z_2 = 10. * z_1;
+      }
+      // advance limit
       z_0 = z_1;
       z_1 = z_2;
 
-      if(fabs(z_1-z_0)<tol)
-      {
+      if (fabs(z_1 - z_0) < tol) {
         flag_phi = 1;
-        A_0 = D_rho - (phi_total_D3D(R,z_0,hdp)-Phi_0)/(cs*cs);
-        A_1 = D_rho - (phi_total_D3D(R,z_1,hdp)-Phi_0)/(cs*cs);
-        //make sure we haven't crossed 0
-        if(A_1<0)
+        A_0      = D_rho - (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs);
+        A_1      = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs);
+        // make sure we haven't crossed 0
+        if (A_1 < 0) {
           z_1 = z_0;
+        }
       }
       iter_phi++;
-      if(iter_phi>1000)
-      {
+      if (iter_phi > 1000) {
         printf("Something wrong in determining central density...\n");
-        printf("iter_phi = %d\n",iter_phi);
-        printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n",z_0,z_1,z_2,A_0,A_1,phi_total_D3D(R,z_0,hdp),phi_total_D3D(R,z_1,hdp));
-        #ifdef MPI_CHOLLA
+        printf("iter_phi = %d\n", iter_phi);
+        printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1,
+               Phi_Total_D3D(R, z_0, hdp), Phi_Total_D3D(R, z_1, hdp));
+#ifdef MPI_CHOLLA
         MPI_Finalize();
-        #endif
+#endif
         exit(0);
       }
     }
-    A_1 = D_rho - (phi_total_D3D(R,z_1,hdp)-Phi_0)/(cs*cs);
+    A_1        = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs);
     z_disk_max = z_1;
 
-    //Compute surface density
-    z_int_min = 0.0; //kpc
-    z_int_max = z_1; //kpc
-    dz_int = (z_int_max-z_int_min)/((Real) (n_int));
-    phi_int = 0.0;
-    for(k=0;k<n_int;k++)
-    {
-      z_0 = 0.5*dz_int + dz_int*((Real) k);
-      Delta_phi = (phi_total_D3D(R,z_0,hdp)-Phi_0)/(cs*cs);
-      A = D_rho - Delta_phi;
-      phi_int += rho_eos*pow((gamma-1)*A,1./(gamma-1.))*dz_int;
+    // Compute surface density
+    z_int_min = 0.0;  // kpc
+    z_int_max = z_1;  // kpc
+    dz_int    = (z_int_max - z_int_min) / ((Real)(n_int));
+    phi_int   = 0.0;
+    for (k = 0; k < n_int; k++) {
+      z_0       = 0.5 * dz_int + dz_int * ((Real)k);
+      Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs);
+      A         = D_rho - Delta_phi;
+      phi_int += rho_eos * pow((gamma - 1) * A, 1. / (gamma - 1.)) * dz_int;
     }
 
-    //update density constant
-    D_new = D_rho * pow(phi_int/(0.5*Sigma_r),1.-gamma);
+    // update density constant
+    D_new = D_rho * pow(phi_int / (0.5 * Sigma_r), 1. - gamma);
 
-    //if we have converged, exit!
-    if(fabs(phi_int-0.5*Sigma_r)/(0.5*Sigma_r)<D_tol)
-    {
-      //done!
+    // if we have converged, exit!
+    if (fabs(phi_int - 0.5 * Sigma_r) / (0.5 * Sigma_r) < D_tol) {
+      // done!
       flag = 1;
     }
 
     iter++;
 
-    if(iter>100)
-    {
+    if (iter > 100) {
       printf("About to exit...\n");
-      #ifdef MPI_CHOLLA
+#ifdef MPI_CHOLLA
       MPI_Finalize();
-      #endif
+#endif
       exit(0);
     }
   }
 
-  //OK, at this stage we know how to set the densities
-  //so let's take cell averages
+  // OK, at this stage we know how to set the densities
+  // so let's take cell averages
   flag  = 0;
-  n_int = 10; // integrate over a 1/10 cell
-  for(k=ks;k<nzt;k++)
-  {
-    //find cell center, bottom, and top
-    z_int_min  = z_hc_D3D(k,dz,nz,ng) - 0.5*dz;
-    z_int_max  = z_hc_D3D(k,dz,nz,ng) + 0.5*dz;
-    if(z_int_max>z_disk_max)
+  n_int = 10;  // integrate over a 1/10 cell
+  for (k = ks; k < nzt; k++) {
+    // find cell center, bottom, and top
+    z_int_min = Z_Hc_D3D(k, dz, nz, ng) - 0.5 * dz;
+    z_int_max = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz;
+    if (z_int_max > z_disk_max) {
       z_int_max = z_disk_max;
-    if(!flag)
-    {
-      dz_int = (z_int_max-z_int_min)/((Real) (n_int));
+    }
+    if (!flag) {
+      dz_int  = (z_int_max - z_int_min) / ((Real)(n_int));
       phi_int = 0.0;
-      for(i=0;i<n_int;i++)
-      {
-        z_0 = 0.5*dz_int + dz_int*((Real) i) + z_int_min;
-        Delta_phi = (phi_total_D3D(R,z_0,hdp)-Phi_0)/(cs*cs);
-        A = D_rho - Delta_phi;
-        phi_int += rho_eos*pow((gamma-1)*A,1./(gamma-1.))*dz_int;
+      for (i = 0; i < n_int; i++) {
+        z_0       = 0.5 * dz_int + dz_int * ((Real)i) + z_int_min;
+        Delta_phi = (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs);
+        A         = D_rho - Delta_phi;
+        phi_int += rho_eos * pow((gamma - 1) * A, 1. / (gamma - 1.)) * dz_int;
       }
 
-      //set density based on integral
-      //of density in this cell
-      rho[k] = phi_int/dz;
+      // set density based on integral
+      // of density in this cell
+      rho[k] = phi_int / dz;
 
-      if(z_int_max==z_disk_max)
-      {
+      if (z_int_max == z_disk_max) {
         flag = 1;
       }
-    }else{
-      //no mass up here!
+    } else {
+      // no mass up here!
       rho[k] = 0;
     }
 
-    //mirror densities
-    //above and below disk plane
-    if(nz%2)
-    {
-      km = (ng+(nz-1)/2) - (k-ks);
-    }else{
-      km = ng + nz/2 - (k-ks) -1;
+    // mirror densities
+    // above and below disk plane
+    if (nz % 2) {
+      km = (ng + (nz - 1) / 2) - (k - ks);
+    } else {
+      km = ng + nz / 2 - (k - ks) - 1;
     }
-    rho[km] = rho[k];
-    Delta_phi = (phi_total_D3D(R,z_hc_D3D(k,dz,nz,ng),hdp)-Phi_0)/(cs*cs);
+    rho[km]   = rho[k];
+    Delta_phi = (Phi_Total_D3D(R, Z_Hc_D3D(k, dz, nz, ng), hdp) - Phi_0) / (cs * cs);
   }
 
-  //check the surface density
+  // check the surface density
   phi_int = 0.0;
-  for(k=0;k<nzt;k++)
-    phi_int += rho[k]*dz;
+  for (k = 0; k < nzt; k++) {
+    phi_int += rho[k] * dz;
+  }
 }
 
-
-Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp)
+Real Determine_Rho_EOS_D3D(Real cs, Real Sigma_0, Real *hdp)
 {
-  //OK, we need to set rho_eos based on the central surface density.
-  //and the central potential
+  // OK, we need to set rho_eos based on the central surface density.
+  // and the central potential
   int k;
   Real z_pos, rho_eos;
-  Real Phi_0 = phi_total_D3D(0,0,hdp);
+  Real Phi_0 = Phi_Total_D3D(0, 0, hdp);
   Real gamma = hdp[13];
-  //Real gamma = 1.001; // CHANGED FOR ISOTHERMAL
+  // Real gamma = 1.001; // CHANGED FOR ISOTHERMAL
   Real Delta_phi;
   Real A = 0.0;
 
-  //determine the maximum scale height by finding the
-  //zero crossing of 1-(Phi-Phi_0)/cs^2
-  //using the secant method
+  // determine the maximum scale height by finding the
+  // zero crossing of 1-(Phi-Phi_0)/cs^2
+  // using the secant method
   Real z_0, z_1, z_2, A_0, A_1;
-  Real tol = 1.0e-6;
+  Real tol     = 1.0e-6;
   int flag_phi = 0;
   int iter_phi = 0;
 
-  //get started
+  // get started
   z_0 = 1.0e-3;
   z_1 = 1.0e-2;
-  while(!flag_phi)
-  {
-    A_0 = 1.0 - (phi_total_D3D(0,z_0,hdp)-Phi_0)/(cs*cs);
-    A_1 = 1.0 - (phi_total_D3D(0,z_1,hdp)-Phi_0)/(cs*cs);
-    z_2 = z_1 - A_1*(z_1-z_0)/(A_1-A_0);
+  while (!flag_phi) {
+    A_0 = 1.0 - (Phi_Total_D3D(0, z_0, hdp) - Phi_0) / (cs * cs);
+    A_1 = 1.0 - (Phi_Total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs);
+    z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0);
 
-    if( fabs(z_2-z_1)/fabs(z_1) > 10.)
-      z_2 = 10.*z_1;
+    if (fabs(z_2 - z_1) / fabs(z_1) > 10.) {
+      z_2 = 10. * z_1;
+    }
 
-    //advance limit
+    // advance limit
     z_0 = z_1;
     z_1 = z_2;
 
-    //printf("z_0 %e z_1 %e\n",z_0,z_1);
-    if(fabs(z_1-z_0)<tol)
-    {
+    // printf("z_0 %e z_1 %e\n",z_0,z_1);
+    if (fabs(z_1 - z_0) < tol) {
       flag_phi = 1;
-      A_0 = 1.0 - (phi_total_D3D(0,z_0,hdp)-Phi_0)/(cs*cs);
-      A_1 = 1.0 - (phi_total_D3D(0,z_1,hdp)-Phi_0)/(cs*cs);
-      //make sure we haven't crossed 0
-      if(A_1<0)
+      A_0      = 1.0 - (Phi_Total_D3D(0, z_0, hdp) - Phi_0) / (cs * cs);
+      A_1      = 1.0 - (Phi_Total_D3D(0, z_1, hdp) - Phi_0) / (cs * cs);
+      // make sure we haven't crossed 0
+      if (A_1 < 0) {
         z_1 = z_0;
+      }
     }
     iter_phi++;
-    if(iter_phi>1000)
-    {
+    if (iter_phi > 1000) {
       printf("Something wrong in determining central density...\n");
-      printf("iter_phi = %d\n",iter_phi);
-      printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n",z_0,z_1,z_2,A_0,A_1,phi_total_D3D(0,z_0,hdp),phi_total_D3D(0,z_1,hdp));
-      #ifdef MPI_CHOLLA
+      printf("iter_phi = %d\n", iter_phi);
+      printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1,
+             Phi_Total_D3D(0, z_0, hdp), Phi_Total_D3D(0, z_1, hdp));
+#ifdef MPI_CHOLLA
       MPI_Finalize();
-      #endif
+#endif
       exit(0);
     }
   }
 
-  //generate a high resolution density and z profile
-  int n_int = 1000;
-  Real z_int_min = 0.0; //kpc
-  Real z_int_max = z_1; //kpc
-  Real dz_int = (z_int_max-z_int_min)/((Real) (n_int));
-  Real phi_int = 0.0;
-
-  //now integrate the density profile
-  for(k=0;k<n_int;k++)
-  {
-    z_pos = 0.5*dz_int + dz_int*((Real) k);
-    Delta_phi = phi_total_D3D(0,z_pos,hdp)-Phi_0;
-    A = 1.0 - Delta_phi/(cs*cs);
-    phi_int += pow((gamma-1)*A,1./(gamma-1.))*dz_int;
+  // generate a high resolution density and z profile
+  int n_int      = 1000;
+  Real z_int_min = 0.0;  // kpc
+  Real z_int_max = z_1;  // kpc
+  Real dz_int    = (z_int_max - z_int_min) / ((Real)(n_int));
+  Real phi_int   = 0.0;
+
+  // now integrate the density profile
+  for (k = 0; k < n_int; k++) {
+    z_pos     = 0.5 * dz_int + dz_int * ((Real)k);
+    Delta_phi = Phi_Total_D3D(0, z_pos, hdp) - Phi_0;
+    A         = 1.0 - Delta_phi / (cs * cs);
+    phi_int += pow((gamma - 1) * A, 1. / (gamma - 1.)) * dz_int;
   }
-  //use the potential integral to set central density at r=0
-  rho_eos = 0.5*Sigma_0/phi_int;
+  // use the potential integral to set central density at r=0
+  rho_eos = 0.5 * Sigma_0 / phi_int;
 
-  //check
+  // check
   /*
   phi_int = 0.0;
   for(k=0;k<n_int;k++)
@@ -723,43 +709,34 @@ Real determine_rho_eos_D3D(Real cs, Real Sigma_0, Real *hdp)
   printf("phi_int %e Sigma_0/2 %e\n",phi_int,0.5*Sigma_0);
   */
 
-  //return the central density
+  // return the central density
   return rho_eos;
 }
 
-
-
-Real halo_density_D3D(Real r, Real *r_halo, Real *rho_halo, Real dr, int nr)
+Real Halo_Density_D3D(Real r, Real *r_halo, Real *rho_halo, Real dr, int nr)
 {
-  //interpolate the halo density profile
+  // interpolate the halo density profile
   int i;
 
-  //find the index of the current
-  //position in r_halo (based on r_hc_D3D)
-  i = (int) ((r - 0.5*dr)/dr );
-  if(i<0||i>=nr-1)
-  {
-    if(i<0)
-    {
+  // find the index of the current
+  // position in r_halo (based on r_hc_D3D)
+  i = (int)((r - 0.5 * dr) / dr);
+  if (i < 0 || i >= nr - 1) {
+    if (i < 0) {
       i = 0;
-    }else{
-      i = nr-2;
+    } else {
+      i = nr - 2;
     }
   }
   // return the interpolated density profile
-  return (rho_halo[i+1] - rho_halo[i])*(r - r_halo[i])/(r_halo[i+1]-r_halo[i]) + rho_halo[i];
+  return (rho_halo[i + 1] - rho_halo[i]) * (r - r_halo[i]) / (r_halo[i + 1] - r_halo[i]) + rho_halo[i];
 }
 
-
-
-
-
-/*! \fn void Disk_3D(parameters P)
+/*! \fn void Disk_3D(Parameters P )
  *  \brief Initialize the grid with a 3D disk. */
-void Grid3D::Disk_3D(parameters p)
+void Grid3D::Disk_3D(Parameters p)
 {
-
-  #ifdef DISK_ICS
+#ifdef DISK_ICS
 
   int i, j, k, id;
   Real x_pos, y_pos, z_pos, r, phi;
@@ -771,71 +748,66 @@ void Grid3D::Disk_3D(parameters p)
   Real r_cool;
 
   // MW model
-  M_vir = 1.0e12; // viral mass of MW in M_sun
-  M_d = 6.5e10; // mass of disk in M_sun (assume all stars)
-  R_d = 3.5; // MW stellar disk scale length in kpc
-  z_d = 3.5/5.0; // MW stellar disk scale height in kpc
-  R_vir = 261; // MW viral radius in kpc
-  c_vir = 20; // MW halo concentration (to account for adiabatic contraction)
-  r_cool = 157.0; // cooling radius in kpc (MW)
-
-  // M82 model
-  //M_vir = 5.0e10; // viral mass of M82 in M_sun (guess)
-  //M_d = 1.0e10; // mass of M82 disk in M_sun (Greco 2012)
-  //R_d = 0.8; // M82 stellar disk scale length in kpc (Mayya 2009)
-  //z_d = 0.15; // M82 stellar thin disk scale height in kpc (Lim 2013)
-  //R_vir = R_d/0.015; // M82 viral radius in kpc from R_(1/2) = 0.015 R_200 (Kravtsov 2013)
-  //c_vir = 10; // M82 halo concentration
-  //r_cool = 100.0; // cooling in kpc (M82, guess)
-
-  M_h = M_vir - M_d; // halo mass in M_sun
-  R_s = R_vir / c_vir; // halo scale length in kpc
-  //T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos   = 1.859984e-14
-  //T_d = 2.0e5;
-  T_d = 1.0e4; // CHANGED FOR ISOTHERMAL
-  T_h = 1.0e6; // halo temperature, at density floor
-  rho_eos = 1.0e7; //gas eos normalized at 1e7 Msun/kpc^3
-  rho_eos_h = 3.0e3; //gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5)
-  mu = 0.6;
-
-  R_g = 2.0*R_d;   //gas scale length in kpc
-  Sigma_0 = 0.25*M_d/(2*M_PI*R_g*R_g); //central surface density in Msun/kpc^2
-  H_g = z_d; //initial guess for gas scale height
-  //rho_floor = 1.0e3; //ICs minimum density in Msun/kpc^3
-
-  //EOS info
-  cs = sqrt(KB*T_d/(mu*MP))*TIME_UNIT/LENGTH_UNIT; //sound speed in kpc/kyr
-  cs_h = sqrt(KB*T_h/(mu*MP))*TIME_UNIT/LENGTH_UNIT; //sound speed in kpc/kyr
-
-  //set some initial parameters
-  int nhdp = 21;  //number of parameters to pass hydrostatic column
-  Real *hdp = (Real *) calloc(nhdp,sizeof(Real));  //parameters
-  hdp[0] = M_vir;
-  hdp[1] = M_d;
-  hdp[2] = M_h;
-  hdp[3] = R_vir;
-  hdp[4] = c_vir;
-  hdp[5] = R_s;
-  hdp[6] = R_d;
-  hdp[7] = z_d;
-  hdp[8] = T_d;
-  hdp[9] = Sigma_0;
-  hdp[10] = R_g;
-  hdp[11] = H_g;
-  hdp[13] = p.gamma;
-
-  //determine rho_eos by setting central density of disk
-  //based on central temperature
-  rho_eos = determine_rho_eos_D3D(cs, Sigma_0, hdp);
-
-  //set EOS parameters
-  //K_eos = cs*cs*pow(rho_eos,1.0-p.gamma)/p.gamma; //P = K\rho^gamma
-  K_eos = cs*cs*rho_eos; // CHANGED FOR ISOTHERMAL
-  K_eos_h = cs_h*cs_h*pow(rho_eos_h,1.0-p.gamma)/p.gamma;
-
-  //Store remaining parameters
+  DiskGalaxy galaxy = galaxies::MW;  // NOLINT(cppcoreguidelines-slicing)
+  // M82 model galaxies::M82;
+
+  M_vir = galaxy.getM_vir();    // viral mass in M_sun
+  M_d   = galaxy.getM_d();      // mass of disk in M_sun (assume all stars)
+  R_d   = galaxy.getR_d();      // stellar disk scale length in kpc
+  z_d   = galaxy.getZ_d();      // stellar disk scale height in kpc
+  R_vir = galaxy.getR_vir();    // viral radius in kpc
+  c_vir = galaxy.getC_vir();    // halo concentration (to account for adiabatic
+                                // contraction)
+  r_cool = galaxy.getR_cool();  // cooling radius in kpc (MW)
+
+  M_h = M_vir - M_d;    // halo mass in M_sun
+  R_s = R_vir / c_vir;  // halo scale length in kpc
+  // T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14
+  // T_d = 2.0e5;
+  T_d       = 1.0e4;  // CHANGED FOR ISOTHERMAL
+  T_h       = 1.0e6;  // halo temperature, at density floor
+  rho_eos   = 1.0e7;  // gas eos normalized at 1e7 Msun/kpc^3
+  rho_eos_h = 3.0e3;  // gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5)
+  mu        = 0.6;
+
+  R_g     = 2.0 * R_d;                            // gas scale length in kpc
+  Sigma_0 = 0.25 * M_d / (2 * M_PI * R_g * R_g);  // central surface density in Msun/kpc^2
+  H_g     = z_d;                                  // initial guess for gas scale height
+  // rho_floor = 1.0e3; //ICs minimum density in Msun/kpc^3
+
+  // EOS info
+  cs   = sqrt(KB * T_d / (mu * MP)) * TIME_UNIT / LENGTH_UNIT;  // sound speed in kpc/kyr
+  cs_h = sqrt(KB * T_h / (mu * MP)) * TIME_UNIT / LENGTH_UNIT;  // sound speed in kpc/kyr
+
+  // set some initial parameters
+  int nhdp  = 21;                                  // number of parameters to pass hydrostatic column
+  Real *hdp = (Real *)calloc(nhdp, sizeof(Real));  // parameters
+  hdp[0]    = M_vir;
+  hdp[1]    = M_d;
+  hdp[2]    = M_h;
+  hdp[3]    = R_vir;
+  hdp[4]    = c_vir;
+  hdp[5]    = R_s;
+  hdp[6]    = R_d;
+  hdp[7]    = z_d;
+  hdp[8]    = T_d;
+  hdp[9]    = Sigma_0;
+  hdp[10]   = R_g;
+  hdp[11]   = H_g;
+  hdp[13]   = p.gamma;
+
+  // determine rho_eos by setting central density of disk
+  // based on central temperature
+  rho_eos = Determine_Rho_EOS_D3D(cs, Sigma_0, hdp);
+
+  // set EOS parameters
+  // K_eos = cs*cs*pow(rho_eos,1.0-p.gamma)/p.gamma; //P = K\rho^gamma
+  K_eos   = cs * cs * rho_eos;  // CHANGED FOR ISOTHERMAL
+  K_eos_h = cs_h * cs_h * pow(rho_eos_h, 1.0 - p.gamma) / p.gamma;
+
+  // Store remaining parameters
   hdp[12] = K_eos;
-  hdp[14] = 0.0; //rho_floor, set to 0
+  hdp[14] = 0.0;  // rho_floor, set to 0
   hdp[15] = rho_eos;
   hdp[16] = cs;
   hdp[17] = K_eos_h;
@@ -843,32 +815,28 @@ void Grid3D::Disk_3D(parameters p)
   hdp[19] = cs_h;
   hdp[20] = r_cool;
 
-
-  //Now we can start the density calculation
-  //we will loop over each column and compute
-  //the density distribution
-  int nz  = p.nz;
-  int nzt = 2*H.n_ghost + nz;
-  Real dz = p.zlen / ((Real) nz);
-  Real *rho = (Real *) calloc(nzt,sizeof(Real));
-
+  // Now we can start the density calculation
+  // we will loop over each column and compute
+  // the density distribution
+  int nz    = p.nz;
+  int nzt   = 2 * H.n_ghost + nz;
+  Real dz   = p.zlen / ((Real)nz);
+  Real *rho = (Real *)calloc(nzt, sizeof(Real));
 
   // create a look up table for the halo gas profile
-  int nr = 1000;
-  Real dr = sqrt(3)*0.5*fmax(p.xlen, p.zlen) / ((Real) nr);
-  Real *rho_halo = (Real *) calloc(nr,sizeof(Real));
-  Real *r_halo = (Real *) calloc(nr,sizeof(Real));
-
+  int nr         = 1000;
+  Real dr        = sqrt(3) * 0.5 * fmax(p.xlen, p.zlen) / ((Real)nr);
+  Real *rho_halo = (Real *)calloc(nr, sizeof(Real));
+  Real *r_halo   = (Real *)calloc(nr, sizeof(Real));
 
   //////////////////////////////////////////////
   //////////////////////////////////////////////
   // Produce a look up table for a hydrostatic hot halo
   //////////////////////////////////////////////
   //////////////////////////////////////////////
-  hydrostatic_ray_analytical_D3D(rho_halo, r_halo, hdp, dr, nr);
+  Hydrostatic_Ray_Analytical_D3D(rho_halo, r_halo, hdp, dr, nr);
   chprintf("Hot halo lookup table generated...\n");
 
-
   //////////////////////////////////////////////
   //////////////////////////////////////////////
   // Add a disk component
@@ -878,45 +846,44 @@ void Grid3D::Disk_3D(parameters p)
   // hydrostatic column for the disk
   // and add the disk density and thermal energy
   // to the density and energy arrays
-  for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-    //chprintf("j %d\n",j);
-    for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-
+  for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+    // chprintf("j %d\n",j);
+    for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
       // get the centered x, y, and z positions
       k = H.n_ghost + H.ny;
       Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-      //cylindrical radius
-      r = sqrt(x_pos*x_pos + y_pos*y_pos);
+      // cylindrical radius
+      r = sqrt(x_pos * x_pos + y_pos * y_pos);
 
+      // Compute the hydrostatic density profile in this z column
+      // owing to the disk
+      // hydrostatic_column_analytical_D3D(rho, r, hdp, dz, nz, H.n_ghost);
+      Hydrostatic_Column_Isothermal_D3D(rho, r, hdp, dz, nz,
+                                        H.n_ghost);  // CHANGED_FOR_ISOTHERMAL
 
-      //Compute the hydrostatic density profile in this z column
-      //owing to the disk
-      //hydrostatic_column_analytical_D3D(rho, r, hdp, dz, nz, H.n_ghost);
-      hydrostatic_column_isothermal_D3D(rho, r, hdp, dz, nz, H.n_ghost); //CHANGED_FOR_ISOTHERMAL
+      // store densities
+      for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
-      //store densities
-      for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
-
-        //get density from hydrostatic column computation
-        #ifdef MPI_CHOLLA
-        d = rho[nz_local_start + H.n_ghost + (k-H.n_ghost)];
-        #else
-        d = rho[H.n_ghost + (k-H.n_ghost)];
-        #endif
-        //if (d != d || d < 0) printf("Error calculating density. d: %e\n", d);
+  // get density from hydrostatic column computation
+  #ifdef MPI_CHOLLA
+        d = rho[nz_local_start + H.n_ghost + (k - H.n_ghost)];
+  #else
+        d = rho[H.n_ghost + (k - H.n_ghost)];
+  #endif
+        // if (d != d || d < 0) printf("Error calculating density. d: %e\n", d);
 
         // set pressure adiabatically
-        //P = K_eos*pow(d,p.gamma);
+        // P = K_eos*pow(d,p.gamma);
         // set pressure isothermally
-        P = d*cs*cs; // CHANGED FOR ISOTHERMAL
+        P = d * cs * cs;  // CHANGED FOR ISOTHERMAL
 
         // store density in density
-        C.density[id]    = d;
+        C.density[id] = d;
 
         // store internal energy in Energy array
-        C.Energy[id] = P/(gama-1.0);
+        C.Energy[id] = P / (gama - 1.0);
       }
     }
   }
@@ -927,100 +894,110 @@ void Grid3D::Disk_3D(parameters p)
   Real zpm, zpp;
   Real Pm, Pp;
 
-  //pressure gradients for changing
-  //the rotational velocity
+  // pressure gradients for changing
+  // the rotational velocity
   Real dPdx, dPdy, dPdr;
 
-  //compute radial pressure gradients, adjust circular velocities
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-
-        id = i + j*H.nx + k*H.nx*H.ny;
+  // compute radial pressure gradients, adjust circular velocities
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
-        //get density
+        // get density
         d = C.density[id];
 
-        //restrict to regions where the density
-        //has been set
-        if(d>0.0)
-        {
+        // restrict to regions where the density
+        // has been set
+        if (d > 0.0) {
           // get the centered x, y, and z positions
           Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-          // calculate radial position and phi (assumes disk is centered at 0, 0)
-          r = sqrt(x_pos*x_pos + y_pos*y_pos);
-          phi = atan2(y_pos, x_pos); // azimuthal angle (in x-y plane)
+          // calculate radial position and phi (assumes disk is centered at 0,
+          // 0)
+          r   = sqrt(x_pos * x_pos + y_pos * y_pos);
+          phi = atan2(y_pos, x_pos);  // azimuthal angle (in x-y plane)
 
           // radial acceleration from disk
-          a_d = fabs(gr_disk_D3D(r, z_pos, hdp));
+          a_d = fabs(Gr_Disk_D3D(r, z_pos, hdp));
           // radial acceleration from halo
-          a_h = fabs(gr_halo_D3D(r, z_pos, hdp));
+          a_h = fabs(Gr_Halo_D3D(r, z_pos, hdp));
 
           //  pressure gradient along x direction
           // gradient calc is first order at boundaries
-          if (i == H.n_ghost) idm = i + j*H.nx + k*H.nx*H.ny;
-          else idm  = (i-1) + j*H.nx + k*H.nx*H.ny;
-          if (i == H.nx-H.n_ghost-1) idp  = i + j*H.nx + k*H.nx*H.ny;
-          else idp  = (i+1) + j*H.nx + k*H.nx*H.ny;
-          Get_Position(i-1, j, k, &xpm, &ypm, &zpm);
-          Get_Position(i+1, j, k, &xpp, &ypp, &zpp);
-          Pm = C.Energy[idm]*(gama-1.0); // only internal energy stored in energy currently
-          Pp = C.Energy[idp]*(gama-1.0); // only internal energy stored in energy currently
-          dPdx =  (Pp-Pm)/(xpp-xpm);
-
-          //pressure gradient along y direction
-          if (j == H.n_ghost) idm = i + j*H.nx + k*H.nx*H.ny;
-          else idm  = i + (j-1)*H.nx + k*H.nx*H.ny;
-          if (j == H.ny-H.n_ghost-1) idp  = i + j*H.nx + k*H.nx*H.ny;
-          else idp  = i + (j+1)*H.nx + k*H.nx*H.ny;
-          Get_Position(i, j-1, k, &xpm, &ypm, &zpm);
-          Get_Position(i, j+1, k, &xpp, &ypp, &zpm);
-          Pm = C.Energy[idm]*(gama-1.0); // only internal energy stored in energy currently
-          Pp = C.Energy[idp]*(gama-1.0); // only internal energy stored in energy currently
-          dPdy =  (Pp-Pm)/(ypp-ypm);
-
-          //radial pressure gradient
-          dPdr = x_pos*dPdx/r + y_pos*dPdy/r;
-
-          //radial acceleration
-          a = a_d + a_h + dPdr/d;
-
-          if(isnan(a)||(a!=a)||(r*a<0))
-          {
-            //printf("i %d j %d k %d a %e a_d %e dPdr %e d %e\n",i,j,k,a,a_d,dPdr,d);
-            //printf("i %d j %d k %d x_pos %e y_pos %e z_pos %e dPdx %e dPdy %e\n",i,j,k,x_pos,y_pos,z_pos,dPdx,dPdy);
-            //printf("i %d j %d k %d Pm %e Pp %e\n",i,j,k,Pm,Pp);
-            //printf("ypp %e ypm %e xpp %e zpm %e r %e\n",ypp,ypm, xpp, xpm ,r);
-            //printf("Energy pm %e pp %e density pm %e pp %e\n",C.Energy[idm],C.Energy[idp],C.density[idm],C.density[idp]);
+          if (i == H.n_ghost) {
+            idm = i + j * H.nx + k * H.nx * H.ny;
+          } else {
+            idm = (i - 1) + j * H.nx + k * H.nx * H.ny;
           }
-          else {
-
+          if (i == H.nx - H.n_ghost - 1) {
+            idp = i + j * H.nx + k * H.nx * H.ny;
+          } else {
+            idp = (i + 1) + j * H.nx + k * H.nx * H.ny;
+          }
+          Get_Position(i - 1, j, k, &xpm, &ypm, &zpm);
+          Get_Position(i + 1, j, k, &xpp, &ypp, &zpp);
+          Pm   = C.Energy[idm] * (gama - 1.0);  // only internal energy stored in energy currently
+          Pp   = C.Energy[idp] * (gama - 1.0);  // only internal energy stored in energy currently
+          dPdx = (Pp - Pm) / (xpp - xpm);
+
+          // pressure gradient along y direction
+          if (j == H.n_ghost) {
+            idm = i + j * H.nx + k * H.nx * H.ny;
+          } else {
+            idm = i + (j - 1) * H.nx + k * H.nx * H.ny;
+          }
+          if (j == H.ny - H.n_ghost - 1) {
+            idp = i + j * H.nx + k * H.nx * H.ny;
+          } else {
+            idp = i + (j + 1) * H.nx + k * H.nx * H.ny;
+          }
+          Get_Position(i, j - 1, k, &xpm, &ypm, &zpm);
+          Get_Position(i, j + 1, k, &xpp, &ypp, &zpm);
+          Pm   = C.Energy[idm] * (gama - 1.0);  // only internal energy stored in energy currently
+          Pp   = C.Energy[idp] * (gama - 1.0);  // only internal energy stored in energy currently
+          dPdy = (Pp - Pm) / (ypp - ypm);
+
+          // radial pressure gradient
+          dPdr = x_pos * dPdx / r + y_pos * dPdy / r;
+
+          // radial acceleration
+          a = a_d + a_h + dPdr / d;
+
+          if (isnan(a) || (a != a) || (r * a < 0)) {
+            // printf("i %d j %d k %d a %e a_d %e dPdr %e d
+            // %e\n",i,j,k,a,a_d,dPdr,d); printf("i %d j %d k %d x_pos %e y_pos
+            // %e z_pos %e dPdx %e dPdy
+            // %e\n",i,j,k,x_pos,y_pos,z_pos,dPdx,dPdy); printf("i %d j %d k %d
+            // Pm %e Pp %e\n",i,j,k,Pm,Pp); printf("ypp %e ypm %e xpp %e zpm %e
+            // r %e\n",ypp,ypm, xpp, xpm ,r); printf("Energy pm %e pp %e density
+            // pm %e pp
+            // %e\n",C.Energy[idm],C.Energy[idp],C.density[idm],C.density[idp]);
+          } else {
             // radial velocity
-            v = sqrt(r*a);
-            vx = -sin(phi)*v;
-            vy = cos(phi)*v;
+            v  = sqrt(r * a);
+            vx = -sin(phi) * v;
+            vy = cos(phi) * v;
             vz = 0;
 
             // set the momenta
-            C.momentum_x[id] = d*vx;
-            C.momentum_y[id] = d*vy;
-            C.momentum_z[id] = d*vz;
-
-            //sheepishly check for NaN's!
+            C.momentum_x[id] = d * vx;
+            C.momentum_y[id] = d * vy;
+            C.momentum_z[id] = d * vz;
 
-            if((d<0)||(P<0)||(isnan(d))||(isnan(P))||(d!=d)||(P!=P))
-              printf("d %e P %e i %d j %d k %d id %d\n",d,P,i,j,k,id);
+            // sheepishly check for NaN's!
 
-            if((isnan(vx))||(isnan(vy))||(isnan(vz))||(vx!=vx)||(vy!=vy)||(vz!=vz)) {
-              printf("vx %e vy %e vz %e i %d j %d k %d id %d\n",vx,vy,vz,i,j,k,id);
+            if ((d < 0) || (P < 0) || (isnan(d)) || (isnan(P)) || (d != d) || (P != P)) {
+              printf("d %e P %e i %d j %d k %d id %d\n", d, P, i, j, k, id);
             }
-            else {
-            //if the density is negative, there
-            //is a bigger problem!
-              if(d<0)
-              {
-                printf("pid %d error negative density i %d j %d k %d d %e\n",-1,i,j,k,d);
+
+            if ((isnan(vx)) || (isnan(vy)) || (isnan(vz)) || (vx != vx) || (vy != vy) || (vz != vz)) {
+              printf("vx %e vy %e vz %e i %d j %d k %d id %d\n", vx, vy, vz, i, j, k, id);
+            } else {
+              // if the density is negative, there
+              // is a bigger problem!
+              if (d < 0) {
+                printf("pid %d error negative density i %d j %d k %d d %e\n", -1, i, j, k, d);
               }
             }
           }
@@ -1034,31 +1011,30 @@ void Grid3D::Disk_3D(parameters p)
   // Add a hot, hydrostatic halo
   //////////////////////////////////////////////
   //////////////////////////////////////////////
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
         // get the cell index
-        id = i + j*H.nx + k*H.nx*H.ny;
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // get the centered x, y, and z positions
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-        // calculate 3D radial position and phi (assumes halo is centered at 0, 0)
-        r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos);
+        // calculate 3D radial position and phi (assumes halo is centered at 0,
+        // 0)
+        r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos);
 
         // interpolate the density at this position
-        d = halo_density_D3D(r, r_halo, rho_halo, dr, nr);
+        d = Halo_Density_D3D(r, r_halo, rho_halo, dr, nr);
 
         // set pressure adiabatically
-        P = K_eos_h*pow(d,p.gamma);
+        P = K_eos_h * pow(d, p.gamma);
 
         // store density in density
         C.density[id] += d;
 
         // store internal energy in Energy array
-        C.Energy[id] += P/(gama-1.0);
-
+        C.Energy[id] += P / (gama - 1.0);
       }
     }
   }
@@ -1069,36 +1045,34 @@ void Grid3D::Disk_3D(parameters p)
   //////////////////////////////////////////////
   //////////////////////////////////////////////
 
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
-        id = i + j*H.nx + k*H.nx*H.ny;
-
-        // set internal energy
-        #ifdef DE
+  // set internal energy
+  #ifdef DE
         C.GasEnergy[id] = C.Energy[id];
-        #endif
+  #endif
 
         // add kinetic contribution to total energy
-        C.Energy[id] += 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id] + C.momentum_z[id]*C.momentum_z[id])/C.density[id];
-
+        C.Energy[id] += 0.5 *
+                        (C.momentum_x[id] * C.momentum_x[id] + C.momentum_y[id] * C.momentum_y[id] +
+                         C.momentum_z[id] * C.momentum_z[id]) /
+                        C.density[id];
       }
     }
   }
 
-  //free density profile
+  // free density profile
   free(rho);
   free(hdp);
 
-  //free the arrays
-  //for the hot halo
-  //gas lookup table
+  // free the arrays
+  // for the hot halo
+  // gas lookup table
   free(r_halo);
   free(rho_halo);
 
-  #endif //DISK_ICS
-
+#endif  // DISK_ICS
 }
-
-
diff --git a/src/model/disk_galaxy.h b/src/model/disk_galaxy.h
index d21b7ed98..9f58bf2a6 100644
--- a/src/model/disk_galaxy.h
+++ b/src/model/disk_galaxy.h
@@ -1,155 +1,191 @@
 #ifndef DISK_GALAXY
 #define DISK_GALAXY
 
+#define SIMULATED_FRACTION 0.1
+
 #include <cmath>
+#include <iostream>
+#include <random>
+
 #include "../global/global.h"
 
-class DiskGalaxy {
-
-private:
-    Real M_vir, M_d, R_d, Z_d, R_vir, c_vir, r_cool, M_h, R_h;
-    Real log_func(Real y) {
-        return log(1+y) - y/(1+y);
-    };
-
-
-public:
-    DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) {
-        M_d = md;
-        R_d = rd;
-        Z_d = zd;
-        M_vir = mvir;
-        R_vir = rvir;
-        c_vir = cvir;
-        r_cool = rcool;
-        M_h = M_vir - M_d;
-        R_h = R_vir / c_vir;
-    };
-
-
-    /**
-     *     Radial acceleration in miyamoto nagai
-     */
-    Real gr_disk_D3D(Real R, Real z) {
-        Real A = R_d + sqrt(Z_d*Z_d + z*z);
-        Real B = pow(A*A + R*R, 1.5);
-
-        return -GN*M_d*R/B;
-    };
-
-
-    /**
-     *     Radial acceleration in NFW halo
-     */
-    Real gr_halo_D3D(Real R, Real z){
-        Real r = sqrt(R*R + z*z); //spherical radius
-        Real x = r / R_h;
-        Real r_comp = R/r;
-
-        Real A = log_func(x);
-        Real B = 1.0 / (r*r);
-        Real C = GN*M_h/log_func(c_vir);
-
-        return -C*A*B*r_comp;
-    };
-
-
-    /**
-     * Convenience method that returns the combined radial acceleration
-     * of a disk galaxy at a specified point.
-     * @param R the cylindrical radius at the desired point
-     * @param z the distance perpendicular to the plane of the disk of the desired point
-     * @return
-     */
-    Real gr_total_D3D(Real R, Real z) {
-        return gr_disk_D3D(R, z) + gr_halo_D3D(R, z);
-    };
-
-
-    /**
-     *    Potential of NFW halo
-     */
-    Real phi_halo_D3D(Real R, Real z) {
-        Real r = sqrt(R * R + z * z); //spherical radius
-        Real x = r / R_h;
-        Real C = GN * M_h / (R_h * log_func(c_vir));
-
-        //limit x to non-zero value
-        if (x < 1.0e-9) x = 1.0e-9;
-
-        return -C * log(1 + x) / x;
-    };
-
-
-    /**
-     *  Miyamoto-Nagai potential
-     */
-    Real phi_disk_D3D(Real R, Real z) {
-        Real A = sqrt(z*z + Z_d*Z_d);
-        Real B = R_d + A;
-        Real C = sqrt(R*R + B*B);
-
-        //patel et al. 2017, eqn 2
-        return -GN * M_d / C;
-    };
-
-    Real rho_disk_D3D(const Real r, const Real z) {
-      const Real a = R_d;
-      const Real c = Z_d;
-      const Real b = sqrt(z*z+c*c);
-      const Real d = a+b;
-      const Real s = r*r+d*d;
-      return M_d*c*c*(a*(d*d+r*r)+3.0*b*d*d)/(4.0*M_PI*b*b*b*pow(s,2.5));
+class DiskGalaxy
+{
+ private:
+  Real M_vir, M_d, R_d, Z_d, R_vir, c_vir, r_cool, M_h, R_h;
+  Real log_func(Real y) { return log(1 + y) - y / (1 + y); };
+
+ public:
+  DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool)
+  {
+    M_d    = md;
+    R_d    = rd;
+    Z_d    = zd;
+    M_vir  = mvir;
+    R_vir  = rvir;
+    c_vir  = cvir;
+    r_cool = rcool;
+    M_h    = M_vir - M_d;
+    R_h    = R_vir / c_vir;
+  };
+
+  /**
+   *     Radial acceleration in miyamoto nagai
+   */
+  Real gr_disk_D3D(Real R, Real z)
+  {
+    Real A = R_d + sqrt(Z_d * Z_d + z * z);
+    Real B = pow(A * A + R * R, 1.5);
+
+    return -GN * M_d * R / B;
+  };
+
+  /**
+   *     Radial acceleration in NFW halo
+   */
+  Real gr_halo_D3D(Real R, Real z)
+  {
+    Real r      = sqrt(R * R + z * z);  // spherical radius
+    Real x      = r / R_h;
+    Real r_comp = R / r;
+
+    Real A = log_func(x);
+    Real B = 1.0 / (r * r);
+    Real C = GN * M_h / log_func(c_vir);
+
+    return -C * A * B * r_comp;
+  };
+
+  /**
+   * Convenience method that returns the combined radial acceleration
+   * of a disk galaxy at a specified point.
+   * @param R the cylindrical radius at the desired point
+   * @param z the distance perpendicular to the plane of the disk of the desired
+   * point
+   * @return
+   */
+  Real gr_total_D3D(Real R, Real z) { return gr_disk_D3D(R, z) + gr_halo_D3D(R, z); };
+
+  /**
+   *    Potential of NFW halo
+   */
+  Real phi_halo_D3D(Real R, Real z)
+  {
+    Real r = sqrt(R * R + z * z);  // spherical radius
+    Real x = r / R_h;
+    Real C = GN * M_h / (R_h * log_func(c_vir));
+
+    // limit x to non-zero value
+    if (x < 1.0e-9) {
+      x = 1.0e-9;
     }
 
-    /**
-     *  Convenience method that returns the combined gravitational potential
-     *  of the disk and halo.
-     */
-    Real phi_total_D3D(Real R, Real z) {
-      return phi_halo_D3D(R, z) + phi_disk_D3D(R, z);
-    };
-
-
-    /**
-     * epicyclic frequency
-     */
-    Real kappa2(Real R, Real z) {
-      Real r = sqrt(R*R + z*z);
-      Real x = r/R_h;
-      Real C = GN * M_h / (R_h * log_func(c_vir));
-      Real A = R_d + sqrt(z*z + Z_d*Z_d);
-      Real B = sqrt(R*R + A*A);
-
-      Real phiH_prime = -C*R/(r*r)/(1 + x) + C*log(1+x)*R_h*R/(r*r*r) + GN*M_d*R/(B*B*B);
-      Real phiH_prime_prime = -C/(r*r)/(1+x) + 2*C*R*R/(r*r*r*r)/(1+x) + C/((1+x)*(1+x))*R*R/R_h/(r*r*r) +
-               C*R*R/(1+x)/(r*r*r*r) + C*log(1+x)*R_h/(r*r*r)*(1 - 3*R*R/(r*r)) +
-               GN*M_d/(B*B*B)*(1 - 3*R*R/(B*B));
-
-      return 3/R*phiH_prime + phiH_prime_prime;
-    };
-
-
-    Real surface_density(Real R) {
-        return M_d/(2*M_PI)/(R_d*R_d)*exp(-R/R_d);
-    };
-
-    Real sigma_crit(Real R) {
-        return 3.36*GN*surface_density(R)/sqrt(kappa2(R,0.0));
-    };
-
-
-    Real getM_d() const { return M_d; };
-    Real getR_d() const { return R_d; };
-    Real getZ_d() const { return Z_d; };
-
+    return -C * log(1 + x) / x;
+  };
+
+  /**
+   *  Miyamoto-Nagai potential
+   */
+  Real phi_disk_D3D(Real R, Real z)
+  {
+    Real A = sqrt(z * z + Z_d * Z_d);
+    Real B = R_d + A;
+    Real C = sqrt(R * R + B * B);
+
+    // patel et al. 2017, eqn 2
+    return -GN * M_d / C;
+  };
+
+  Real rho_disk_D3D(const Real r, const Real z)
+  {
+    const Real a = R_d;
+    const Real c = Z_d;
+    const Real b = sqrt(z * z + c * c);
+    const Real d = a + b;
+    const Real s = r * r + d * d;
+    return M_d * c * c * (a * (d * d + r * r) + 3.0 * b * d * d) / (4.0 * M_PI * b * b * b * pow(s, 2.5));
+  }
+
+  /**
+   *  Convenience method that returns the combined gravitational potential
+   *  of the disk and halo.
+   */
+  Real phi_total_D3D(Real R, Real z) { return phi_halo_D3D(R, z) + phi_disk_D3D(R, z); };
+
+  /**
+   * epicyclic frequency
+   */
+  Real kappa2(Real R, Real z)
+  {
+    Real r = sqrt(R * R + z * z);
+    Real x = r / R_h;
+    Real C = GN * M_h / (R_h * log_func(c_vir));
+    Real A = R_d + sqrt(z * z + Z_d * Z_d);
+    Real B = sqrt(R * R + A * A);
+
+    Real phiH_prime = -C * R / (r * r) / (1 + x) + C * log(1 + x) * R_h * R / (r * r * r) + GN * M_d * R / (B * B * B);
+    Real phiH_prime_prime = -C / (r * r) / (1 + x) + 2 * C * R * R / (r * r * r * r) / (1 + x) +
+                            C / ((1 + x) * (1 + x)) * R * R / R_h / (r * r * r) +
+                            C * R * R / (1 + x) / (r * r * r * r) +
+                            C * log(1 + x) * R_h / (r * r * r) * (1 - 3 * R * R / (r * r)) +
+                            GN * M_d / (B * B * B) * (1 - 3 * R * R / (B * B));
+
+    return 3 / R * phiH_prime + phiH_prime_prime;
+  };
+
+  Real surface_density(Real R) { return M_d / (2 * M_PI) / (R_d * R_d) * exp(-R / R_d); };
+
+  Real sigma_crit(Real R) { return 3.36 * GN * surface_density(R) / sqrt(kappa2(R, 0.0)); };
+
+  Real getM_d() const { return M_d; };
+  Real getR_d() const { return R_d; };
+  Real getZ_d() const { return Z_d; };
+  Real getM_vir() const { return M_vir; };
+  Real getR_vir() const { return R_vir; };
+  Real getC_vir() const { return c_vir; };
+  Real getR_cool() const { return r_cool; };
 };
 
-namespace Galaxies {
-    // all masses in M_sun and all distances in kpc
-    //static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0);
-    static DiskGalaxy MW(6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0);
-    static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8/0.015, 10, 100.0);
+class ClusteredDiskGalaxy : public DiskGalaxy
+{
+ private:
+  Real lower_cluster_mass, higher_cluster_mass;
+  Real normalization;
+
+ public:
+  ClusteredDiskGalaxy(Real lm, Real hm, Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool)
+      : DiskGalaxy{md, rd, zd, mvir, rvir, cvir, rcool}, lower_cluster_mass{lm}, higher_cluster_mass{hm}
+  {
+    // if (lower_cluster_mass >= higher_cluster_mass)
+    normalization = 1 / log(higher_cluster_mass / lower_cluster_mass);
+  };
+
+  Real getLowerClusterMass() const { return lower_cluster_mass; }
+  Real getHigherClusterMass() const { return higher_cluster_mass; }
+  Real getNormalization() const { return normalization; }
+
+  std::vector<Real> generateClusterPopulationMasses(int N, std::mt19937_64 generator)
+  {
+    std::vector<Real> population(N);
+    for (int i = 0; i < N; i++) {
+      population[singleClusterMass(generator)];
+    }
+    return population;
+  }
+
+  Real singleClusterMass(std::mt19937_64 generator)
+  {
+    std::uniform_real_distribution<Real> uniform_distro(0, 1);
+    return lower_cluster_mass * exp(uniform_distro(generator) / normalization);
+  }
 };
 
-#endif //DISK_GALAXY
+namespace galaxies
+{
+// all masses in M_sun and all distances in kpc
+// static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0);
+static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0);
+static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8 / 0.015, 10, 100.0);
+};  // namespace galaxies
+
+#endif  // DISK_GALAXY
diff --git a/src/mpi/MPI_Comm_node.c b/src/mpi/MPI_Comm_node.c
deleted file mode 100644
index 057233d8c..000000000
--- a/src/mpi/MPI_Comm_node.c
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifdef MPI_CHOLLA
-#include <mpi.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "../mpi/MPI_Comm_node.h"
-
-
-/*! \fn int djb2_hash(char *str)
- *  \brief Simple hash function by Dan Bernstein */
-int djb2_hash(char *str);
-
-/*! \fn MPI_Comm MPI_Comm_node(void)
- *  \brief Returns an MPI_Comm for processes on each node.*/
-MPI_Comm MPI_Comm_node(int *myid_node, int *nproc_node)
-{
-  int  myid;				    //global rank
-  int  nproc;				    //global rank
-  char pname[MPI_MAX_PROCESSOR_NAME];	    //node hostname
-  int  pname_length;			    //length of node hostname
-  int hash;				    //hash of node hostname
-
-  MPI_Comm node_comm;			    //communicator for the procs on each node
-
-  //get the global process rank
-  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
-  MPI_Comm_size(MPI_COMM_WORLD,&nproc);
-
-
-  //if we're the only process, then just return
-  //the global rank, size, and comm
-  if(nproc==1)
-  {
-    *myid_node  = myid;
-    *nproc_node = nproc;
-    return MPI_COMM_WORLD;
-  }
-
-  //get the hostname of the node
-  MPI_Get_processor_name(pname, &pname_length);
-
-  //hash the name of the node
-  hash = abs(djb2_hash(pname));
-
-  //printf("hash %d\n",hash);
-
-  //split the communicator
-  MPI_Comm_split(MPI_COMM_WORLD, hash, myid, &node_comm);
-
-  //get size and rank
-  MPI_Comm_rank(node_comm,myid_node);
-  MPI_Comm_size(node_comm,nproc_node);
-
-  //return the communicator for processors on the node
-  return node_comm;
-}
-
-/*! \fn int djb2_hash(char *str)
- *  \brief Simple hash function by Dan Bernstein */
-int djb2_hash(char *str)
-{
-  int hash = 5381;
-  int c;
-  while((c = *str++))
-    hash = ((hash<<5) + hash) + c; /*hash*33 + c*/
-  return hash;
-}
-#endif /*MPI_CHOLLA*/
diff --git a/src/mpi/MPI_Comm_node.h b/src/mpi/MPI_Comm_node.h
deleted file mode 100644
index 0d8820d02..000000000
--- a/src/mpi/MPI_Comm_node.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef MPI_COMM_NODE
-#define MPI_COMM_NODE
-
-#include <mpi.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif //__cplusplus
-MPI_Comm MPI_Comm_node(int *pid, int *np);
-#ifdef __cplusplus
-}
-#endif //__cplusplus
-
-#endif //MPI_COMM_NODE
diff --git a/src/mpi/cuda_mpi_routines.cu b/src/mpi/cuda_mpi_routines.cu
index 3b2e65e51..987d5fe67 100644
--- a/src/mpi/cuda_mpi_routines.cu
+++ b/src/mpi/cuda_mpi_routines.cu
@@ -1,8 +1,9 @@
 #ifdef MPI_CHOLLA
-#include <stdio.h>
-#include "../utils/gpu.hpp"
-#include "../io/io.h"
-#include "../mpi/cuda_mpi_routines.h"
+  #include <stdio.h>
+
+  #include "../io/io.h"
+  #include "../mpi/cuda_mpi_routines.h"
+  #include "../utils/gpu.hpp"
 
 // #define PRINT_DEVICE_IDS
 
@@ -10,43 +11,52 @@
  *  \brief CUDA initialization within MPI. */
 int initialize_cuda_mpi(int myid, int nprocs)
 {
-  int i_device = 0;   //GPU device for this process
-  int n_device;   //number of GPU devices available
+  int i_device = 0;  // GPU device for this process
+  int n_device;      // number of GPU devices available
 
   cudaError_t flag_error;
 
-  //get the number of cuda devices
+  // get the number of cuda devices
   flag_error = cudaGetDeviceCount(&n_device);
 
-  //check for errors
-  if(flag_error!=cudaSuccess)
-  {
-    if(flag_error==cudaErrorNoDevice)
-      fprintf(stderr,"cudaGetDeviceCount: Error! for myid = %d and n_device = %d; cudaErrorNoDevice\n",myid,n_device);
-    if(flag_error==cudaErrorInsufficientDriver)
-      fprintf(stderr,"cudaGetDeviceCount: Error! for myid = %d and n_device = %d; cudaErrorInsufficientDriver\n",myid,n_device);
+  // check for errors
+  if (flag_error != cudaSuccess) {
+    if (flag_error == cudaErrorNoDevice) {
+      fprintf(stderr,
+              "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; "
+              "cudaErrorNoDevice\n",
+              myid, n_device);
+    }
+    if (flag_error == cudaErrorInsufficientDriver) {
+      fprintf(stderr,
+              "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; "
+              "cudaErrorInsufficientDriver\n",
+              myid, n_device);
+    }
     fflush(stderr);
     return 1;
   }
 
-  //get host name
-  char pname[MPI_MAX_PROCESSOR_NAME];     //node hostname
-  int  pname_length;          //length of node hostname
+  // get host name
+  char pname[MPI_MAX_PROCESSOR_NAME];  // node hostname
+  int pname_length;                    // length of node hostname
   MPI_Get_processor_name(pname, &pname_length);
 
-  //set a cuda device for each process
-  cudaSetDevice(myid%n_device);
+  // set a cuda device for each process
+  cudaSetDevice(myid % n_device);
 
-  //double check
+  // double check
   cudaGetDevice(&i_device);
 
   #ifdef PRINT_DEVICE_IDS
-  printf("In initialize_cuda_mpi: name:%s myid = %d, i_device = %d, n_device = %d\n",pname, myid,i_device,n_device);
+  printf(
+      "In initialize_cuda_mpi: name:%s myid = %d, i_device = %d, n_device = "
+      "%d\n",
+      pname, myid, i_device, n_device);
   fflush(stdout);
   MPI_Barrier(world);
   #endif
 
   return 0;
-
 }
-#endif //MPI_CHOLLA
+#endif  // MPI_CHOLLA
diff --git a/src/mpi/cuda_mpi_routines.h b/src/mpi/cuda_mpi_routines.h
index d408afae6..31b1f89a0 100644
--- a/src/mpi/cuda_mpi_routines.h
+++ b/src/mpi/cuda_mpi_routines.h
@@ -3,7 +3,7 @@
 
 #ifdef __cplusplus
 extern "C" {
-#endif //__cplusplus
+#endif  //__cplusplus
 
 /*! \fn int initialize_cuda_mpi(int myid, int nprocs);
  *  \brief CUDA initialization within MPI. */
@@ -11,7 +11,6 @@ int initialize_cuda_mpi(int myid, int nprocs);
 
 #ifdef __cplusplus
 }
-#endif //__cplusplus
+#endif  //__cplusplus
 
-
-#endif //CUDA_MPI_ROUTINES
+#endif  // CUDA_MPI_ROUTINES
diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp
index 6d51ee643..8f6c533e0 100644
--- a/src/mpi/mpi_routines.cpp
+++ b/src/mpi/mpi_routines.cpp
@@ -1,40 +1,40 @@
 #ifdef MPI_CHOLLA
-#include <mpi.h>
-#include <math.h>
-#include "../mpi/mpi_routines.h"
-#include "../global/global.h"
-#include "../utils/error_handling.h"
-#include "../io/io.h"
-#include "../mpi/cuda_mpi_routines.h"
-#include "../mpi/MPI_Comm_node.h"
-#include <iostream>
+  #include "../mpi/mpi_routines.h"
+
+  #include <math.h>
+  #include <mpi.h>
+
+  #include <iostream>
+  #include <tuple>
+
+  #include "../global/global.h"
+  #include "../io/io.h"
+  #include "../mpi/cuda_mpi_routines.h"
+  #include "../utils/error_handling.h"
 
 /*Global MPI Variables*/
-int procID; /*process rank*/
-int nproc;  /*number of processes in global comm*/
-int root;   /*rank of root process*/
+// note: some relevant global variables are declared in global.h
 
 int procID_node; /*process rank on node*/
 int nproc_node;  /*number of MPI processes on node*/
 
 MPI_Comm world; /*global communicator*/
-MPI_Comm node;  /*global communicator*/
 
 MPI_Datatype MPI_CHREAL; /*set equal to MPI_FLOAT or MPI_DOUBLE*/
 
-#ifdef PARTICLES
+  #ifdef PARTICLES
 MPI_Datatype MPI_PART_INT; /*set equal to MPI_INT or MPI_LONG*/
-#endif
+  #endif
 
-//MPI_Requests for nonblocking comm
+// MPI_Requests for nonblocking comm
 MPI_Request *send_request;
 MPI_Request *recv_request;
 
-//MPI destinations and sources
+// MPI destinations and sources
 int dest[6];
 int source[6];
 
-//Communication buffers
+// Communication buffers
 
 // For BLOCK
 Real *d_send_buffer_x0;
@@ -69,8 +69,8 @@ int x_buffer_length;
 int y_buffer_length;
 int z_buffer_length;
 
-#ifdef PARTICLES
-//Buffers for particles transfers
+  #ifdef PARTICLES
+// Buffers for particles transfers
 Real *d_send_buffer_x0_particles;
 Real *d_send_buffer_x1_particles;
 Real *d_send_buffer_y0_particles;
@@ -84,7 +84,7 @@ Real *d_recv_buffer_y1_particles;
 Real *d_recv_buffer_z0_particles;
 Real *d_recv_buffer_z1_particles;
 
-//Buffers for particles transfers
+// Buffers for particles transfers
 Real *h_send_buffer_x0_particles;
 Real *h_send_buffer_x1_particles;
 Real *h_send_buffer_y0_particles;
@@ -118,7 +118,7 @@ MPI_Request *recv_request_n_particles;
 // Request for Particles Transfer
 MPI_Request *send_request_particles_transfer;
 MPI_Request *recv_request_particles_transfer;
-#endif//PARTICLES
+  #endif  // PARTICLES
 
 /*local domain sizes*/
 /*none of these include ghost cells!*/
@@ -137,16 +137,14 @@ int nproc_x;
 int nproc_y;
 int nproc_z;
 
-#ifdef   FFTW
+  #ifdef FFTW
 ptrdiff_t n_local_complex;
-#endif /*FFTW*/
-
+  #endif /*FFTW*/
 
 /*\fn void InitializeChollaMPI(void) */
 /* Routine to initialize MPI */
 void InitializeChollaMPI(int *pargc, char **pargv[])
 {
-
   /*initialize MPI*/
   MPI_Init(pargc, pargv);
 
@@ -157,7 +155,7 @@ void InitializeChollaMPI(int *pargc, char **pargv[])
   MPI_Comm_size(MPI_COMM_WORLD, &nproc);
 
   /*print a cute message*/
-  //printf("Processor %d of %d: Hello!\n", procID, nproc);
+  // printf("Processor %d of %d: Hello!\n", procID, nproc);
 
   /* set the root process rank */
   root = 0;
@@ -174,108 +172,111 @@ void InitializeChollaMPI(int *pargc, char **pargv[])
   #endif /*PRECISION*/
 
   #ifdef PARTICLES
-  #ifdef PARTICLES_LONG_INTS
+    #ifdef PARTICLES_LONG_INTS
   MPI_PART_INT = MPI_LONG;
-  #else
+    #else
   MPI_PART_INT = MPI_INT;
-  #endif
+    #endif
   #endif
 
-  /*create the MPI_Request arrays for non-blocking sends*/
-  if(!(send_request = (MPI_Request *) malloc(2*sizeof(MPI_Request))))
-  {
+  /*create the MPI_Request arrays for non-blocking sends. If the malloc fails then print an error and exit*/
+  send_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request));
+  if (!send_request) {
     chprintf("Error allocating send_request.\n");
     chexit(-2);
   }
-  if(!(recv_request = (MPI_Request *) malloc(2*sizeof(MPI_Request))))
-  {
+  recv_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request));
+  if (!recv_request) {
     chprintf("Error allocating recv_request.\n");
     chexit(-2);
   }
 
   #ifdef PARTICLES
-  if(!(send_request_n_particles = (MPI_Request *) malloc(2*sizeof(MPI_Request))))
-  {
-    chprintf("Error allocating send_request for number of particles for transfer.\n");
+  send_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request));
+  if (!send_request_n_particles) {
+    chprintf(
+        "Error allocating send_request for number of particles for "
+        "transfer.\n");
     chexit(-2);
   }
-  if(!(recv_request_n_particles = (MPI_Request *) malloc(2*sizeof(MPI_Request))))
-  {
-    chprintf("Error allocating recv_request for number of particles for transfer.\n");
+  recv_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request));
+  if (!recv_request_n_particles) {
+    chprintf(
+        "Error allocating recv_request for number of particles for "
+        "transfer.\n");
     chexit(-2);
   }
-
-  if(!(send_request_particles_transfer = (MPI_Request *) malloc(2*sizeof(MPI_Request))))
-  {
+  send_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request));
+  if (!send_request_particles_transfer) {
     chprintf("Error allocating send_request for particles transfer.\n");
     chexit(-2);
   }
-  if(!(recv_request_particles_transfer = (MPI_Request *) malloc(2*sizeof(MPI_Request))))
-  {
+  recv_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request));
+  if (!recv_request_particles_transfer) {
     chprintf("Error allocating recv_request for particles transfer.\n");
     chexit(-2);
   }
   #endif
 
   /*set up node communicator*/
-  node = MPI_Comm_node(&procID_node, &nproc_node);
+  std::tie(procID_node, nproc_node) = MPI_Comm_node();
+
   // #ifdef ONLY_PARTICLES
   // chprintf("ONLY_PARTICLES: Initializing without CUDA support.\n");
   // #else
   // #ifndef GRAVITY
   // // Needed to initialize cuda after gravity in order to work on Summit
   // //initialize cuda for use with mpi
-  #ifdef CUDA
-  if(initialize_cuda_mpi(procID_node,nproc_node))
-  {
+
+  if (initialize_cuda_mpi(procID_node, nproc_node)) {
     chprintf("Error initializing cuda with mpi.\n");
     chexit(-10);
   }
-  #endif /*CUDA*/
   // #endif//ONLY_PARTICLES
-
 }
 
-
-
 /* Perform domain decomposition */
-void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin)
+void DomainDecomposition(struct Parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin)
 {
-
   DomainDecompositionBLOCK(P, H, nx_gin, ny_gin, nz_gin);
 
   // set grid dimensions
-  H->nx = nx_local+2*H->n_ghost;
+  H->nx      = nx_local + 2 * H->n_ghost;
   H->nx_real = nx_local;
-  if (ny_local == 1) H->ny = 1;
-  else H->ny = ny_local+2*H->n_ghost;
+  if (ny_local == 1) {
+    H->ny = 1;
+  } else {
+    H->ny = ny_local + 2 * H->n_ghost;
+  }
   H->ny_real = ny_local;
-  if (nz_local == 1) H->nz = 1;
-  else H->nz = nz_local+2*H->n_ghost;
+  if (nz_local == 1) {
+    H->nz = 1;
+  } else {
+    H->nz = nz_local + 2 * H->n_ghost;
+  }
   H->nz_real = nz_local;
 
   // set total number of cells
   H->n_cells = H->nx * H->ny * H->nz;
 
-  //printf("In DomainDecomposition: nx %d ny %d nz %d nc %d\n",H->nx,H->ny,H->nz,H->n_cells);
+  // printf("In DomainDecomposition: nx %d ny %d nz %d nc
+  // %d\n",H->nx,H->ny,H->nz,H->n_cells);
 
-  //Allocate communication buffers
+  // Allocate communication buffers
   Allocate_MPI_DeviceBuffers(H);
-
 }
 
 /* Perform domain decomposition */
-void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin)
+void DomainDecompositionBLOCK(struct Parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin)
 {
   int n;
-  int i,j,k;
+  int i, j, k;
   int *ix;
   int *iy;
   int *iz;
 
-  //enforce an even number of processes
-  if(nproc%2 && nproc>1)
-  {
+  // enforce an even number of processes
+  if (nproc % 2 && nproc > 1) {
     chprintf("WARNING: Odd number of processors > 1 is not officially supported\n");
   }
 
@@ -285,19 +286,19 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin
   nz_global = nz_gin;
 
   /*allocate subdomain indices*/
-  ix = (int *)malloc(nproc*sizeof(int));
-  iy = (int *)malloc(nproc*sizeof(int));
-  iz = (int *)malloc(nproc*sizeof(int));
+  ix = (int *)malloc(nproc * sizeof(int));
+  iy = (int *)malloc(nproc * sizeof(int));
+  iz = (int *)malloc(nproc * sizeof(int));
 
   /*tile the MPI processes in blocks*/
   /*this sets nproc_x, nproc_y, nproc_z */
-  //chprintf("About to enter tiling block decomp\n");
+  // chprintf("About to enter tiling block decomp\n");
   MPI_Barrier(world);
   TileBlockDecomposition();
 
   if (nz_global > nx_global) {
     int tmp;
-    tmp = nproc_x;
+    tmp     = nproc_x;
     nproc_x = nproc_z;
     nproc_z = tmp;
   }
@@ -308,118 +309,118 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin
   nproc_y = P->n_proc_y;
   nproc_z = P->n_proc_z;
   chprintf("Setting MPI grid: nx=%d  ny=%d  nz=%d\n", nproc_x, nproc_y, nproc_z);
-  // chprintf("Setting MPI grid: nx=%d  ny=%d  nz=%d\n", P->n_proc_x, P->n_proc_y, P->n_proc_z);
+  // chprintf("Setting MPI grid: nx=%d  ny=%d  nz=%d\n", P->n_proc_x,
+  // P->n_proc_y, P->n_proc_z);
   #endif
 
-  //chprintf("Allocating tiling.\n");
+  // chprintf("Allocating tiling.\n");
   MPI_Barrier(world);
-  int ***tiling = three_dimensional_int_array(nproc_x,nproc_y,nproc_z);
+  int ***tiling = three_dimensional_int_array(nproc_x, nproc_y, nproc_z);
 
-
-  //find indices
-  //chprintf("Setting indices.\n");
+  // find indices
+  // chprintf("Setting indices.\n");
   MPI_Barrier(world);
   n = 0;
-  //Gravity: Change the order of MPI processes assignment to match the assignment done by PFFT
-  //Original:
-  // for(i=0;i<nproc_x;i++)
-  //   for(j=0;j<nproc_y;j++)
-  //     for(k=0;k<nproc_z;k++)
+  // Gravity: Change the order of MPI processes assignment to match the
+  // assignment done by PFFT Original:
+  //  for(i=0;i<nproc_x;i++)
+  //    for(j=0;j<nproc_y;j++)
+  //      for(k=0;k<nproc_z;k++)
   //
 
-  for(k=0;k<nproc_z;k++)
-    for(j=0;j<nproc_y;j++)
-      for(i=0;i<nproc_x;i++)
-      {
+  for (k = 0; k < nproc_z; k++) {
+    for (j = 0; j < nproc_y; j++) {
+      for (i = 0; i < nproc_x; i++) {
         ix[n] = i;
         iy[n] = j;
         iz[n] = k;
 
         tiling[i][j][k] = n;
 
-        if(n==procID)
-        {
-          dest[0] = i-1;
-          if(dest[0]<0)
+        if (n == procID) {
+          dest[0] = i - 1;
+          if (dest[0] < 0) {
             dest[0] += nproc_x;
-          dest[1] = i+1;
-          if(dest[1]>=nproc_x)
+          }
+          dest[1] = i + 1;
+          if (dest[1] >= nproc_x) {
             dest[1] -= nproc_x;
+          }
 
-          dest[2] = j-1;
-          if(dest[2]<0)
+          dest[2] = j - 1;
+          if (dest[2] < 0) {
             dest[2] += nproc_y;
-          dest[3] = j+1;
-          if(dest[3]>=nproc_y)
+          }
+          dest[3] = j + 1;
+          if (dest[3] >= nproc_y) {
             dest[3] -= nproc_y;
+          }
 
-          dest[4] = k-1;
-          if(dest[4]<0)
+          dest[4] = k - 1;
+          if (dest[4] < 0) {
             dest[4] += nproc_z;
-          dest[5] = k+1;
-          if(dest[5]>=nproc_z)
+          }
+          dest[5] = k + 1;
+          if (dest[5] >= nproc_z) {
             dest[5] -= nproc_z;
+          }
         }
         n++;
       }
+    }
+  }
 
   /* set local x, y, z subdomain sizes */
-  n = nx_global%nproc_x;
-  if(!n)
-  {
-    //nx_global splits evenly along x procs*/
-    nx_local = nx_global/nproc_x;
-    nx_local_start = ix[procID]*nx_local;
-  }else{
-    nx_local = nx_global/nproc_x;
-    if(ix[procID]<n)
-    {
+  n = nx_global % nproc_x;
+  if (!n) {
+    // nx_global splits evenly along x procs*/
+    nx_local       = nx_global / nproc_x;
+    nx_local_start = ix[procID] * nx_local;
+  } else {
+    nx_local = nx_global / nproc_x;
+    if (ix[procID] < n) {
       nx_local++;
-      nx_local_start = ix[procID]*nx_local;
-    }else{
-      //check nx_local_start offsets -- should n be (n-1) below?
-      nx_local_start = n*(nx_local+1) + (ix[procID]-n)*nx_local;
+      nx_local_start = ix[procID] * nx_local;
+    } else {
+      // check nx_local_start offsets -- should n be (n-1) below?
+      nx_local_start = n * (nx_local + 1) + (ix[procID] - n) * nx_local;
     }
   }
-  n = ny_global%nproc_y;
-  if(!n)
-  {
-    //ny_global splits evenly along y procs*/
-    ny_local = ny_global/nproc_y;
-    ny_local_start = iy[procID]*ny_local;
-  }else{
-    ny_local = ny_global/nproc_y;
-    if(iy[procID]<n)
-    {
+  n = ny_global % nproc_y;
+  if (!n) {
+    // ny_global splits evenly along y procs*/
+    ny_local       = ny_global / nproc_y;
+    ny_local_start = iy[procID] * ny_local;
+  } else {
+    ny_local = ny_global / nproc_y;
+    if (iy[procID] < n) {
       ny_local++;
-      ny_local_start = iy[procID]*ny_local;
-    }else{
-      ny_local_start = n*(ny_local+1) + (iy[procID]-n)*ny_local;
+      ny_local_start = iy[procID] * ny_local;
+    } else {
+      ny_local_start = n * (ny_local + 1) + (iy[procID] - n) * ny_local;
     }
   }
-  n = nz_global%nproc_z;
-  if(!n)
-  {
-    //nz_global splits evenly along z procs*/
-    nz_local = nz_global/nproc_z;
-    nz_local_start = iz[procID]*nz_local;
-  }else{
-    nz_local = nz_global/nproc_z;
-    if(iz[procID]<n)
-    {
+  n = nz_global % nproc_z;
+  if (!n) {
+    // nz_global splits evenly along z procs*/
+    nz_local       = nz_global / nproc_z;
+    nz_local_start = iz[procID] * nz_local;
+  } else {
+    nz_local = nz_global / nproc_z;
+    if (iz[procID] < n) {
       nz_local++;
-      nz_local_start = iz[procID]*nz_local;
-    }else{
-      nz_local_start = n*(nz_local+1) + (iz[procID]-n)*nz_local;
+      nz_local_start = iz[procID] * nz_local;
+    } else {
+      nz_local_start = n * (nz_local + 1) + (iz[procID] - n) * nz_local;
     }
   }
 
-
-  //find MPI sources
-  for(i=0;i<6;i++)
+  // find MPI sources
+  for (i = 0; i < 6; i++) {
     source[i] = dest[i];
+  }
 
-  //find MPI destinations
+  // find MPI destinations
   dest[0] = tiling[dest[0]][iy[procID]][iz[procID]];
   dest[1] = tiling[dest[1]][iy[procID]][iz[procID]];
   dest[2] = tiling[ix[procID]][dest[2]][iz[procID]];
@@ -434,11 +435,10 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin
   source[4] = tiling[ix[procID]][iy[procID]][source[4]];
   source[5] = tiling[ix[procID]][iy[procID]][source[5]];
 
-  chprintf("nproc_x %d nproc_y %d nproc_z %d\n",nproc_x,nproc_y,nproc_z);
-
-  //free the tiling
-  deallocate_three_dimensional_int_array(tiling,nproc_x,nproc_y,nproc_z);
+  chprintf("nproc_x %d nproc_y %d nproc_z %d\n", nproc_x, nproc_y, nproc_z);
 
+  // free the tiling
+  deallocate_three_dimensional_int_array(tiling, nproc_x, nproc_y, nproc_z);
 
   /*adjust boundary condition flags*/
 
@@ -453,23 +453,25 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin
   /*do x bcnds first*/
   /*exterior faces have to be treated separately*/
   /*as long as there is more than one cell in the x direction*/
-  if (nproc_x!=1) {
-    if((ix[procID]==0)||(ix[procID]==nproc_x-1))
-    {
-      if(ix[procID]==0)
-      {
+  if (nproc_x != 1) {
+    if ((ix[procID] == 0) || (ix[procID] == nproc_x - 1)) {
+      if (ix[procID] == 0) {
         P->xu_bcnd = 5;
-        //if the global bcnd is periodic, use MPI bcnds at ends
-        if(P->xl_bcnd==1) P->xl_bcnd = 5;
-      }else{
+        // if the global bcnd is periodic, use MPI bcnds at ends
+        if (P->xl_bcnd == 1) {
+          P->xl_bcnd = 5;
+        }
+      } else {
         P->xl_bcnd = 5;
-        //if the global bcnd is periodic, use MPI bcnds at ends
-        if(P->xu_bcnd==1) P->xu_bcnd = 5;
+        // if the global bcnd is periodic, use MPI bcnds at ends
+        if (P->xu_bcnd == 1) {
+          P->xu_bcnd = 5;
+        }
       }
-    }else{
-      //this is completely an interior cell
-      //along the x direction, so
-      //set both x bcnds to MPI bcnds
+    } else {
+      // this is completely an interior cell
+      // along the x direction, so
+      // set both x bcnds to MPI bcnds
       P->xl_bcnd = 5;
       P->xu_bcnd = 5;
     }
@@ -478,23 +480,25 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin
   /*do y bcnds next*/
   /*exterior faces have to be treated separately*/
   /*as long as there is more than one cell in the x direction*/
-  if (nproc_y!=1) {
-    if((iy[procID]==0)||(iy[procID]==nproc_y-1))
-    {
-      if(iy[procID]==0)
-      {
+  if (nproc_y != 1) {
+    if ((iy[procID] == 0) || (iy[procID] == nproc_y - 1)) {
+      if (iy[procID] == 0) {
         P->yu_bcnd = 5;
-        //if the global bcnd is periodic, use MPI bcnds at ends
-        if(P->yl_bcnd==1) P->yl_bcnd = 5;
-      }else{
+        // if the global bcnd is periodic, use MPI bcnds at ends
+        if (P->yl_bcnd == 1) {
+          P->yl_bcnd = 5;
+        }
+      } else {
         P->yl_bcnd = 5;
-        //if the global bcnd is periodic, use MPI bcnds at ends
-        if(P->yu_bcnd==1) P->yu_bcnd = 5;
+        // if the global bcnd is periodic, use MPI bcnds at ends
+        if (P->yu_bcnd == 1) {
+          P->yu_bcnd = 5;
+        }
       }
-    }else{
-      //this is completely an interior cell
-      //along the y direction, so
-      //set both y bcnds to MPI bcnds
+    } else {
+      // this is completely an interior cell
+      // along the y direction, so
+      // set both y bcnds to MPI bcnds
       P->yl_bcnd = 5;
       P->yu_bcnd = 5;
     }
@@ -503,55 +507,54 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin
   /*do z bcnds next*/
   /*exterior faces have to be treated separately*/
   /*as long as there is more than one cell in the x direction*/
-  if(nproc_z!=1) {
-    if((iz[procID]==0)||(iz[procID]==nproc_z-1))
-    {
-      if(iz[procID]==0)
-      {
+  if (nproc_z != 1) {
+    if ((iz[procID] == 0) || (iz[procID] == nproc_z - 1)) {
+      if (iz[procID] == 0) {
         P->zu_bcnd = 5;
-        //if the global bcnd is periodic, use MPI bcnds at ends
-        if(P->zl_bcnd==1) P->zl_bcnd = 5;
-      }else{
+        // if the global bcnd is periodic, use MPI bcnds at ends
+        if (P->zl_bcnd == 1) {
+          P->zl_bcnd = 5;
+        }
+      } else {
         P->zl_bcnd = 5;
-        //if the global bcnd is periodic, use MPI bcnds at ends
-        if(P->zu_bcnd==1) P->zu_bcnd = 5;
+        // if the global bcnd is periodic, use MPI bcnds at ends
+        if (P->zu_bcnd == 1) {
+          P->zu_bcnd = 5;
+        }
       }
-    }else{
-      //this is completely an interior cell
-      //along the z direction, so
-      //set both z bcnds to MPI bcnds
+    } else {
+      // this is completely an interior cell
+      // along the z direction, so
+      // set both z bcnds to MPI bcnds
       P->zl_bcnd = 5;
       P->zu_bcnd = 5;
     }
   }
 
-
-  //free indices
+  // free indices
   free(ix);
   free(iy);
   free(iz);
-
 }
 
 void Allocate_MPI_DeviceBuffers(struct Header *H)
 {
-  int xbsize, ybsize, zbsize;
-  if (H->ny==1 && H->nz==1) {
-    xbsize = H->n_fields*H->n_ghost;
-    ybsize = 1;
-    zbsize = 1;    
+  int xbsize = 1, ybsize = 1, zbsize = 1;
+  if (H->ny == 1 && H->nz == 1) {
+    xbsize = H->n_fields * H->n_ghost;
   }
   // 2D
-  if (H->ny>1 && H->nz==1) {
-    xbsize = H->n_fields*H->n_ghost*(H->ny-2*H->n_ghost);
-    ybsize = H->n_fields*H->n_ghost*(H->nx);
-    zbsize = 1;
+  else if (H->ny > 1 && H->nz == 1) {
+    xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost);
+    ybsize = H->n_fields * H->n_ghost * (H->nx);
   }
   // 3D
-  if (H->ny>1 && H->nz>1) {
-    xbsize = H->n_fields*H->n_ghost*(H->ny-2*H->n_ghost)*(H->nz-2*H->n_ghost);
-    ybsize = H->n_fields*H->n_ghost*(H->nx)*(H->nz-2*H->n_ghost);
-    zbsize = H->n_fields*H->n_ghost*(H->nx)*(H->ny);
+  else if (H->ny > 1 && H->nz > 1) {
+    xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost) * (H->nz - 2 * H->n_ghost);
+    ybsize = H->n_fields * H->n_ghost * (H->nx) * (H->nz - 2 * H->n_ghost);
+    zbsize = H->n_fields * H->n_ghost * (H->nx) * (H->ny);
+  } else {
+    throw std::runtime_error("MPI buffer size failed to set.");
   }
 
   x_buffer_length = xbsize;
@@ -560,22 +563,22 @@ void Allocate_MPI_DeviceBuffers(struct Header *H)
 
   #ifdef PARTICLES
   // Set Initial sizes for particles buffers
-  int n_max = std::max( H->nx, H->ny );
-  n_max = std::max( H->nz, n_max );
-  int factor = 2;
-  N_PARTICLES_TRANSFER = n_max * n_max * factor ;
+  int n_max            = std::max(H->nx, H->ny);
+  n_max                = std::max(H->nz, n_max);
+  int factor           = 2;
+  N_PARTICLES_TRANSFER = n_max * n_max * factor;
 
   // Set the number of values that will be transferred for each particle
-  N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities
-  #ifndef SINGLE_PARTICLE_MASS
-  N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle mass
-  #endif
-  #ifdef PARTICLE_IDS
-  N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle ID
-  #endif
-  #ifdef PARTICLE_AGE
-  N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle age
-  #endif
+  N_DATA_PER_PARTICLE_TRANSFER = 6;  // 3 positions and 3 velocities
+    #ifndef SINGLE_PARTICLE_MASS
+  N_DATA_PER_PARTICLE_TRANSFER += 1;  // one more for the particle mass
+    #endif
+    #ifdef PARTICLE_IDS
+  N_DATA_PER_PARTICLE_TRANSFER += 1;  // one more for the particle ID
+    #endif
+    #ifdef PARTICLE_AGE
+  N_DATA_PER_PARTICLE_TRANSFER += 1;  // one more for the particle age
+    #endif
 
   buffer_length_particles_x0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER;
   buffer_length_particles_x0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER;
@@ -589,84 +592,90 @@ void Allocate_MPI_DeviceBuffers(struct Header *H)
   buffer_length_particles_z0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER;
   buffer_length_particles_z1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER;
   buffer_length_particles_z1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER;
-  #endif //PARTICLES
+  #endif  // PARTICLES
 
   chprintf("Allocating MPI communication buffers on GPU ");
   chprintf("(nx = %ld, ny = %ld, nz = %ld).\n", xbsize, ybsize, zbsize);
 
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_x0, xbsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_x1, xbsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_x0, xbsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_x1, xbsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_y0, ybsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_y1, ybsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_y0, ybsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_y1, ybsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_z0, zbsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_z1, zbsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_z0, zbsize*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_z1, zbsize*sizeof(Real)) );
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_x0, xbsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_x1, xbsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_x0, xbsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_x1, xbsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_y0, ybsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_y1, ybsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_y0, ybsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_y1, ybsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_z0, zbsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_z1, zbsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_z0, zbsize * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_z1, zbsize * sizeof(Real)));
 
   #if !defined(MPI_GPU)
-  h_send_buffer_x0 = (Real *) malloc ( xbsize*sizeof(Real) );
-  h_send_buffer_x1 = (Real *) malloc ( xbsize*sizeof(Real) );
-  h_recv_buffer_x0 = (Real *) malloc ( xbsize*sizeof(Real) );
-  h_recv_buffer_x1 = (Real *) malloc ( xbsize*sizeof(Real) );
-  h_send_buffer_y0 = (Real *) malloc ( ybsize*sizeof(Real) );
-  h_send_buffer_y1 = (Real *) malloc ( ybsize*sizeof(Real) );
-  h_recv_buffer_y0 = (Real *) malloc ( ybsize*sizeof(Real) );
-  h_recv_buffer_y1 = (Real *) malloc ( ybsize*sizeof(Real) );
-  h_send_buffer_z0 = (Real *) malloc ( zbsize*sizeof(Real) );
-  h_send_buffer_z1 = (Real *) malloc ( zbsize*sizeof(Real) );
-  h_recv_buffer_z0 = (Real *) malloc ( zbsize*sizeof(Real) );
-  h_recv_buffer_z1 = (Real *) malloc ( zbsize*sizeof(Real) );
+  h_send_buffer_x0 = (Real *)malloc(xbsize * sizeof(Real));
+  h_send_buffer_x1 = (Real *)malloc(xbsize * sizeof(Real));
+  h_recv_buffer_x0 = (Real *)malloc(xbsize * sizeof(Real));
+  h_recv_buffer_x1 = (Real *)malloc(xbsize * sizeof(Real));
+  h_send_buffer_y0 = (Real *)malloc(ybsize * sizeof(Real));
+  h_send_buffer_y1 = (Real *)malloc(ybsize * sizeof(Real));
+  h_recv_buffer_y0 = (Real *)malloc(ybsize * sizeof(Real));
+  h_recv_buffer_y1 = (Real *)malloc(ybsize * sizeof(Real));
+  h_send_buffer_z0 = (Real *)malloc(zbsize * sizeof(Real));
+  h_send_buffer_z1 = (Real *)malloc(zbsize * sizeof(Real));
+  h_recv_buffer_z0 = (Real *)malloc(zbsize * sizeof(Real));
+  h_recv_buffer_z1 = (Real *)malloc(zbsize * sizeof(Real));
   #endif
 
-  // NOTE: When changing this ifdef check for compatibility with 
+  // NOTE: When changing this ifdef check for compatibility with
   // Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer
   // in particles/particles_boundaries.cpp
 
-  // Whether or not MPI_GPU is on, the device has transfer buffers for PARTICLES_GPU
+  // Whether or not MPI_GPU is on, the device has transfer buffers for
+  // PARTICLES_GPU
 
   #if defined(PARTICLES) && defined(PARTICLES_GPU)
-  chprintf("Allocating MPI communication buffers on GPU for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_x0_particles, buffer_length_particles_x0_send*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_x1_particles, buffer_length_particles_x1_send*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_y0_particles, buffer_length_particles_y0_send*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_y1_particles, buffer_length_particles_y1_send*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_z0_particles, buffer_length_particles_z0_send*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_send_buffer_z1_particles, buffer_length_particles_z1_send*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv*sizeof(Real)) );
-  CudaSafeCall ( cudaMalloc (&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv*sizeof(Real)) );
-  #endif // PARTICLES && PARTICLES_GPU
+  chprintf(
+      "Allocating MPI communication buffers on GPU for particle transfers ( "
+      "N_Particles: %d ).\n",
+      N_PARTICLES_TRANSFER);
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_x0_particles, buffer_length_particles_x0_send * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_x1_particles, buffer_length_particles_x1_send * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_y0_particles, buffer_length_particles_y0_send * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_y1_particles, buffer_length_particles_y1_send * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_z0_particles, buffer_length_particles_z0_send * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_send_buffer_z1_particles, buffer_length_particles_z1_send * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real)));
+  GPU_Error_Check(cudaMalloc(&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real)));
+  #endif  // PARTICLES && PARTICLES_GPU
 
   // CPU relies on host buffers, GPU without MPI_GPU relies on host buffers
 
   #ifdef PARTICLES
-  #if (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU)
-  chprintf("Allocating MPI communication buffers on Host for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER );
-  h_send_buffer_x0_particles = (Real *) malloc ( buffer_length_particles_x0_send*sizeof(Real) );
-  h_send_buffer_x1_particles = (Real *) malloc ( buffer_length_particles_x1_send*sizeof(Real) );
-  h_send_buffer_y0_particles = (Real *) malloc ( buffer_length_particles_y0_send*sizeof(Real) );
-  h_send_buffer_y1_particles = (Real *) malloc ( buffer_length_particles_y1_send*sizeof(Real) );
-  h_send_buffer_z0_particles = (Real *) malloc ( buffer_length_particles_z0_send*sizeof(Real) );
-  h_send_buffer_z1_particles = (Real *) malloc ( buffer_length_particles_z1_send*sizeof(Real) );
-  h_recv_buffer_x0_particles = (Real *) malloc ( buffer_length_particles_x0_recv*sizeof(Real) );
-  h_recv_buffer_x1_particles = (Real *) malloc ( buffer_length_particles_x1_recv*sizeof(Real) );
-  h_recv_buffer_y0_particles = (Real *) malloc ( buffer_length_particles_y0_recv*sizeof(Real) );
-  h_recv_buffer_y1_particles = (Real *) malloc ( buffer_length_particles_y1_recv*sizeof(Real) );
-  h_recv_buffer_z0_particles = (Real *) malloc ( buffer_length_particles_z0_recv*sizeof(Real) );
-  h_recv_buffer_z1_particles = (Real *) malloc ( buffer_length_particles_z1_recv*sizeof(Real) );
-  #endif // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU)
-  #endif //PARTICLES
-
+    #if (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU)
+  chprintf(
+      "Allocating MPI communication buffers on Host for particle transfers ( "
+      "N_Particles: %d ).\n",
+      N_PARTICLES_TRANSFER);
+  h_send_buffer_x0_particles = (Real *)malloc(buffer_length_particles_x0_send * sizeof(Real));
+  h_send_buffer_x1_particles = (Real *)malloc(buffer_length_particles_x1_send * sizeof(Real));
+  h_send_buffer_y0_particles = (Real *)malloc(buffer_length_particles_y0_send * sizeof(Real));
+  h_send_buffer_y1_particles = (Real *)malloc(buffer_length_particles_y1_send * sizeof(Real));
+  h_send_buffer_z0_particles = (Real *)malloc(buffer_length_particles_z0_send * sizeof(Real));
+  h_send_buffer_z1_particles = (Real *)malloc(buffer_length_particles_z1_send * sizeof(Real));
+  h_recv_buffer_x0_particles = (Real *)malloc(buffer_length_particles_x0_recv * sizeof(Real));
+  h_recv_buffer_x1_particles = (Real *)malloc(buffer_length_particles_x1_recv * sizeof(Real));
+  h_recv_buffer_y0_particles = (Real *)malloc(buffer_length_particles_y0_recv * sizeof(Real));
+  h_recv_buffer_y1_particles = (Real *)malloc(buffer_length_particles_y1_recv * sizeof(Real));
+  h_recv_buffer_z0_particles = (Real *)malloc(buffer_length_particles_z0_recv * sizeof(Real));
+  h_recv_buffer_z1_particles = (Real *)malloc(buffer_length_particles_z1_recv * sizeof(Real));
+    #endif  // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) ||
+            // defined(PARTICLES_CPU)
+  #endif    // PARTICLES
 }
 
-
 /* MPI reduction wrapper for max(Real)*/
 Real ReduceRealMax(Real x)
 {
@@ -675,11 +684,10 @@ Real ReduceRealMax(Real x)
   Real y;
 
   MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_MAX, world);
-  y = (Real) out;
+  y = (Real)out;
   return y;
 }
 
-
 /* MPI reduction wrapper for min(Real)*/
 Real ReduceRealMin(Real x)
 {
@@ -688,11 +696,10 @@ Real ReduceRealMin(Real x)
   Real y;
 
   MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_MIN, world);
-  y = (Real) out;
+  y = (Real)out;
   return y;
 }
 
-
 /* MPI reduction wrapper for avg(Real)*/
 Real ReduceRealAvg(Real x)
 {
@@ -701,11 +708,34 @@ Real ReduceRealAvg(Real x)
   Real y;
 
   MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_SUM, world);
-  y = (Real) out / nproc;
+  y = (Real)out / nproc;
   return y;
 }
 
-#ifdef PARTICLES
+size_t Reduce_size_t_Max(size_t in)
+{
+  // Get the right MPI type
+  #if SIZE_MAX == UCHAR_MAX
+    #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR
+  #elif SIZE_MAX == USHRT_MAX
+    #define my_MPI_SIZE_T MPI_UNSIGNED_SHORT
+  #elif SIZE_MAX == UINT_MAX
+    #define my_MPI_SIZE_T MPI_UNSIGNED
+  #elif SIZE_MAX == ULONG_MAX
+    #define my_MPI_SIZE_T MPI_UNSIGNED_LONG
+  #elif SIZE_MAX == ULLONG_MAX
+    #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG
+  #else
+    #error "Error: Type of size_t not supported by Reduce_size_t_Max"
+  #endif
+
+  // Perform the reduction
+  size_t out;
+  MPI_Allreduce(&in, &out, 1, my_MPI_SIZE_T, MPI_MAX, world);
+  return out;
+}
+
+  #ifdef PARTICLES
 /* MPI reduction wrapper for sum(part_int)*/
 Real ReducePartIntSum(part_int_t x)
 {
@@ -713,29 +743,31 @@ Real ReducePartIntSum(part_int_t x)
   part_int_t out;
   part_int_t y;
 
-  #ifdef PARTICLES_LONG_INTS
+    #ifdef PARTICLES_LONG_INTS
   MPI_Allreduce(&in, &out, 1, MPI_LONG, MPI_SUM, world);
-  #else
+    #else
   MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_SUM, world);
-  #endif
-  y = (part_int_t) out ;
+    #endif
+  y = (part_int_t)out;
   return y;
 }
 
-
 // Count the particles in the MPI ranks lower than this rank (procID) to get a
 // global offset for the local IDs.
-part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ){
+part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local)
+{
   part_int_t global_offset;
   part_int_t *n_local_all, *n_local_send;
-  n_local_send = (part_int_t *) malloc( 1*sizeof(part_int_t) );
-  n_local_all  = (part_int_t *) malloc( nproc*sizeof(part_int_t) );
+  n_local_send    = (part_int_t *)malloc(1 * sizeof(part_int_t));
+  n_local_all     = (part_int_t *)malloc(nproc * sizeof(part_int_t));
   n_local_send[0] = n_local;
 
-  MPI_Allgather( n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, world );
+  MPI_Allgather(n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, world);
   global_offset = 0;
-  for (int other_rank=0; other_rank<nproc; other_rank++ ){
-    if ( other_rank < procID ) global_offset += n_local_all[other_rank];
+  for (int other_rank = 0; other_rank < nproc; other_rank++) {
+    if (other_rank < procID) {
+      global_offset += n_local_all[other_rank];
+    }
   }
   // printf("global_offset = %ld \n", global_offset );
   free(n_local_send);
@@ -743,7 +775,7 @@ part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ){
   return global_offset;
 }
 
-#endif
+  #endif
 
 /* Print information about the domain properties */
 void Print_Domain_Properties(struct Header H)
@@ -751,54 +783,44 @@ void Print_Domain_Properties(struct Header H)
   int i;
   fflush(stdout);
   MPI_Barrier(world);
-  for(i=0;i<nproc;i++)
-  {
-    if(i==procID)
-    {
-      printf("procID %d nxl %ld nxls %ld\n",procID,nx_local,nx_local_start);
-      printf("xb %e yb %e zb %e xbl %e ybl %e zbl %e\n",H.xbound,H.ybound,H.zbound,H.xblocal,H.yblocal,H.zblocal);
-      printf("dx %e\n",H.dx);
-      printf("dy %e\n",H.dy);
-      printf("dz %e\n",H.dz);
+  for (i = 0; i < nproc; i++) {
+    if (i == procID) {
+      printf("procID %d nxl %ld nxls %ld\n", procID, nx_local, nx_local_start);
+      printf("xb %e yb %e zb %e xbl %e ybl %e zbl %e\n", H.xbound, H.ybound, H.zbound, H.xblocal, H.yblocal, H.zblocal);
+      printf("dx %e\n", H.dx);
+      printf("dy %e\n", H.dy);
+      printf("dz %e\n", H.dz);
       printf("*********\n");
     }
     fflush(stdout);
     MPI_Barrier(world);
   }
-
 }
 
-
-
-
-
-
-
-
-
-
-
-#ifdef PARTICLES
-// Funtion that checks if the buffer size For the particles transfer is large enough,
-// and grows the buffer if needed.
-void Check_and_Grow_Particles_Buffer( Real **part_buffer, int *current_size_ptr, int new_size ){
-
+  #ifdef PARTICLES
+// Funtion that checks if the buffer size For the particles transfer is large
+// enough, and grows the buffer if needed.
+void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, int new_size)
+{
   int current_size = *current_size_ptr;
-  if ( new_size <= current_size ) return;
+  if (new_size <= current_size) {
+    return;
+  }
 
-  new_size = (int) 2 * new_size;
-  std::cout << " #######  Growing Particles Transfer Buffer, size: " << current_size << "  new_size: " << new_size << std::endl;
+  new_size = (int)2 * new_size;
+  std::cout << " #######  Growing Particles Transfer Buffer, size: " << current_size << "  new_size: " << new_size
+            << std::endl;
 
   Real *new_buffer;
-  new_buffer = (Real *) realloc( *part_buffer, new_size*sizeof(Real) );
-  if ( new_buffer == NULL ){
+  new_buffer = (Real *)realloc(*part_buffer, new_size * sizeof(Real));
+  if (new_buffer == NULL) {
     std::cout << " Error When Allocating New Particles Transfer Buffer" << std::endl;
     chexit(-1);
   }
-  *part_buffer = new_buffer;
+  *part_buffer      = new_buffer;
   *current_size_ptr = new_size;
 }
-#endif //PARTICLES
+  #endif  // PARTICLES
 
 /* find the greatest prime factor of an integer */
 int greatest_prime_factor(int n)
@@ -806,37 +828,55 @@ int greatest_prime_factor(int n)
   int ns = n;
   int np = 2;
 
-  if(n==1||n==2)
+  if (n == 1 || n == 2) {
     return n;
+  }
 
-  while(1)
-  {
-    while(!(ns%np))
-    {
-      ns = ns/np;
+  while (true) {
+    while (!(ns % np)) {
+      ns = ns / np;
     }
 
-    if(ns==1)
+    if (ns == 1) {
       break;
+    }
 
     np++;
   }
   return np;
 }
 
+/*
+ tile MPI processes in a block arrangement for the 3D case
+ */
+void TileBlockDecomposition3D(int number, int &np_x, int &np_y, int &np_z)
+{
+  int dims[3]  = {1, 1, 1};
+  size_t index = 0;
+  while (number > 1) {
+    int gpf = greatest_prime_factor(number);
+    number /= gpf;
+    dims[index % 3] *= gpf;
+    index += 1;
+  }
+  np_x = dims[0];
+  np_y = dims[1];
+  np_z = dims[2];
+}
+
 /*tile MPI processes in a block arrangement*/
 void TileBlockDecomposition(void)
 {
   int n_gpf;
 
-  //initialize np_x, np_y, np_z
+  // initialize np_x, np_y, np_z
   int np_x = 1;
   int np_y = 1;
   int np_z = 1;
-  //printf("nproc %d n_gpf %d\n",nproc,n_gpf);
+  // printf("nproc %d n_gpf %d\n",nproc,n_gpf);
 
   /* 1-D case is trivial */
-  if (nz_global==1 && ny_global==1) {
+  if (nz_global == 1 && ny_global == 1) {
     nproc_x = nproc;
     nproc_y = 1;
     nproc_z = 1;
@@ -849,9 +889,9 @@ void TileBlockDecomposition(void)
   /* 2-D case
      we can just assign domain*/
 
-  if (nz_global==1) {
+  if (nz_global == 1) {
     np_x = n_gpf;
-    np_y = nproc/np_x;
+    np_y = nproc / np_x;
     // ensure nproc_x > nproc_y
     if (np_x < np_y) {
       nproc_x = np_y;
@@ -864,73 +904,32 @@ void TileBlockDecomposition(void)
     return;
   }
 
-  /*base decomposition on whether n_gpf==2*/
-  if(n_gpf!=2) {
-    /*we are in 3-d, so split remainder evenly*/
-    np_x  = n_gpf;
-    n_gpf = greatest_prime_factor(nproc/n_gpf);
-    if(n_gpf!=2) {
-      /*the next greatest prime is odd, so just split*/
-      np_y = n_gpf;
-      np_z = nproc/(np_x*np_y);
-    } else {
-      /*increase ny, nz round-robin*/
-      while(np_x*np_y*np_z < nproc)
-        {
-	  np_y*=2;
-	  if(np_x*np_y*np_z==nproc)
-	    break;
-	  np_z*=2;
-        }
+  TileBlockDecomposition3D(nproc, np_x, np_y, np_z);
+  // reorder x, y, z
 
-    }
-  } else {
-    /*nproc is a power of 2*/
-    /*we are in 3-d, so split remainder evenly*/
-
-    /*increase nx, ny, nz round-robin*/
-    while(np_x*np_y*np_z < nproc)
-      {
-        np_x*=2;
-        if(np_x*np_y*np_z==nproc)
-          break;
-        np_y*=2;
-        if(np_x*np_y*np_z==nproc)
-          break;
-        np_z*=2;
-      }
+  int n_tmp;
+  if (np_z > np_y) {
+    n_tmp = np_y;
+    np_y  = np_z;
+    np_z  = n_tmp;
+  }
+  if (np_y > np_x) {
+    n_tmp = np_x;
+    np_x  = np_y;
+    np_y  = n_tmp;
+  }
+  if (np_z > np_y) {
+    n_tmp = np_y;
+    np_y  = np_z;
+    np_z  = n_tmp;
   }
-  
-
-  //reorder x, y, z
 
-  int n_tmp;
-  if(np_z>np_y)
-  {
-  	n_tmp = np_y;
-  	np_y  = np_z;
-  	np_z  = n_tmp;
-  }
-  if(np_y>np_x)
-  {
-  	n_tmp = np_x;
-  	np_x  = np_y;
-  	np_y  = n_tmp;
-  }
-  if(np_z>np_y)
-  {
-  	n_tmp = np_y;
-  	np_y  = np_z;
-  	np_z  = n_tmp;
-  }
-
-  //save result
+  // save result
   nproc_x = np_x;
   nproc_y = np_y;
   nproc_z = np_z;
 }
 
-
 /*! \fn int ***three_dimensional_int_array(int n, int l, int m)
  *  *  \brief Allocate a three dimensional (n x l x m) int array
  *   */
@@ -939,60 +938,88 @@ int ***three_dimensional_int_array(int n, int l, int m)
   int ***x;
 
   x = new int **[n];
-  for(int i=0;i<n;i++)
-  {
+  for (int i = 0; i < n; i++) {
     x[i] = new int *[l];
-    for(int j=0;j<l;j++)
-    {
-      x[i][j] = new int [m];
+    for (int j = 0; j < l; j++) {
+      x[i][j] = new int[m];
     }
   }
 
   return x;
 }
-/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int m)
+/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int
+ * m)
  *  *  \brief De-allocate a three dimensional (n x l x m) int array.
  *   */
 void deallocate_three_dimensional_int_array(int ***x, int n, int l, int m)
 {
-  for(int i=0;i<n;i++)
-  {
-    for(int j=0;j<l;j++)
+  for (int i = 0; i < n; i++) {
+    for (int j = 0; j < l; j++) {
       delete[] x[i][j];
+    }
     delete[] x[i];
   }
   delete x;
 }
 
+void copyHostToDeviceReceiveBuffer(int direction)
+{
+  int xbsize = x_buffer_length, ybsize = y_buffer_length, zbsize = z_buffer_length;
 
-void copyHostToDeviceReceiveBuffer ( int direction )
+  switch (direction) {
+    case (0):
+      cudaMemcpy(d_recv_buffer_x0, h_recv_buffer_x0, xbsize * sizeof(Real), cudaMemcpyHostToDevice);
+      break;
+    case (1):
+      cudaMemcpy(d_recv_buffer_x1, h_recv_buffer_x1, xbsize * sizeof(Real), cudaMemcpyHostToDevice);
+      break;
+    case (2):
+      cudaMemcpy(d_recv_buffer_y0, h_recv_buffer_y0, ybsize * sizeof(Real), cudaMemcpyHostToDevice);
+      break;
+    case (3):
+      cudaMemcpy(d_recv_buffer_y1, h_recv_buffer_y1, ybsize * sizeof(Real), cudaMemcpyHostToDevice);
+      break;
+    case (4):
+      cudaMemcpy(d_recv_buffer_z0, h_recv_buffer_z0, zbsize * sizeof(Real), cudaMemcpyHostToDevice);
+      break;
+    case (5):
+      cudaMemcpy(d_recv_buffer_z1, h_recv_buffer_z1, zbsize * sizeof(Real), cudaMemcpyHostToDevice);
+      break;
+  }
+}
+
+std::pair<int, int> MPI_Comm_node()
 {
+  // get the global process rank
+  int myid, nproc;
+  MPI_Comm_rank(MPI_COMM_WORLD, &myid);
+  MPI_Comm_size(MPI_COMM_WORLD, &nproc);
 
-  int xbsize = x_buffer_length,
-      ybsize = y_buffer_length,
-      zbsize = z_buffer_length;
-
-  switch ( direction ) {
-  case ( 0 ): cudaMemcpy(d_recv_buffer_x0, h_recv_buffer_x0,
-                         xbsize*sizeof(Real), cudaMemcpyHostToDevice);
-              break;
-  case ( 1 ): cudaMemcpy(d_recv_buffer_x1, h_recv_buffer_x1,
-                         xbsize*sizeof(Real), cudaMemcpyHostToDevice);
-              break;
-  case ( 2 ): cudaMemcpy(d_recv_buffer_y0, h_recv_buffer_y0,
-                         ybsize*sizeof(Real), cudaMemcpyHostToDevice);
-              break;
-  case ( 3 ): cudaMemcpy(d_recv_buffer_y1, h_recv_buffer_y1,
-                         ybsize*sizeof(Real), cudaMemcpyHostToDevice);
-              break;
-  case ( 4 ): cudaMemcpy(d_recv_buffer_z0, h_recv_buffer_z0,
-                         zbsize*sizeof(Real), cudaMemcpyHostToDevice);
-              break;
-  case ( 5 ): cudaMemcpy(d_recv_buffer_z1, h_recv_buffer_z1,
-                         zbsize*sizeof(Real), cudaMemcpyHostToDevice);
-              break;
+  // if there is the only one process, then just return the global rank and size
+  if (nproc == 1) {
+    return {myid, nproc};
   }
 
+  // get the hostname of the node
+  std::string pname;  // node hostname
+  pname.resize(MPI_MAX_PROCESSOR_NAME);
+  int pname_length;  // length of node hostname
+
+  MPI_Get_processor_name(pname.data(), &pname_length);
+
+  // hash the name of the node. MPI_Comm_split doesn't like negative numbers and accepts ints not unsigned ints so we
+  // need to take the absolute value
+  int const hash = std::abs(static_cast<int>(std::hash<std::string>{}(pname)));
+
+  // split the communicator
+  MPI_Comm node_comm;  // communicator for the procs on each node
+  MPI_Comm_split(MPI_COMM_WORLD, hash, myid, &node_comm);
+
+  // get size and rank
+  MPI_Comm_rank(node_comm, &myid);
+  MPI_Comm_size(node_comm, &nproc);
+
+  return {myid, nproc};
 }
 
 #endif /*MPI_CHOLLA*/
diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h
index b94e8595c..913b5e36a 100644
--- a/src/mpi/mpi_routines.h
+++ b/src/mpi/mpi_routines.h
@@ -1,42 +1,45 @@
 #ifdef MPI_CHOLLA
-#ifndef  MPI_ROUTINES_H
-#define  MPI_ROUTINES_H
-#include <mpi.h>
-#include <stddef.h>
-#include "../grid/grid3D.h"
-#include "../global/global.h"
-
-#ifdef FFTW
-#include "fftw3.h"
-#include "fftw3-mpi.h"
-#endif /*FFTW*/
+  #ifndef MPI_ROUTINES_H
+    #define MPI_ROUTINES_H
+    #include <mpi.h>
+    #include <stddef.h>
+
+    #include <utility>
+
+    #include "../global/global.h"
+    #include "../grid/grid3D.h"
+
+    #ifdef FFTW
+      #include "fftw3-mpi.h"
+      #include "fftw3.h"
+    #endif /*FFTW*/
 
 /*Global MPI Variables*/
-extern int procID; /*process rank*/
-extern int nproc;  /*number of processes in global comm*/
-extern int root;   /*rank of root process*/
+// NOTE: some variable heavily used by mpi are declared in global.h so that they are defined even
+//       when compiled without mpi
+
 extern int procID_node; /*process rank on node*/
 extern int nproc_node;  /*number of MPI processes on node*/
 
-extern MPI_Comm world;	/*global communicator*/
-extern MPI_Comm node;	/*communicator for each node*/
+extern MPI_Comm world; /*global communicator*/
+extern MPI_Comm node;  /*communicator for each node*/
 
 extern MPI_Datatype MPI_CHREAL; /*data type describing float precision*/
 
-#ifdef PARTICLES
+    #ifdef PARTICLES
 extern MPI_Datatype MPI_PART_INT; /*data type describing interger for particles precision*/
-#endif
+    #endif
 
-//extern MPI_Request send_request[6];
-//extern MPI_Request recv_request[6];
+// extern MPI_Request send_request[6];
+// extern MPI_Request recv_request[6];
 extern MPI_Request *send_request;
 extern MPI_Request *recv_request;
 
-//MPI destinations and sources
+// MPI destinations and sources
 extern int dest[6];
 extern int source[6];
 
-//Communication buffers
+// Communication buffers
 
 // For BLOCK
 extern Real *d_send_buffer_x0;
@@ -65,8 +68,8 @@ extern Real *h_recv_buffer_y1;
 extern Real *h_recv_buffer_z0;
 extern Real *h_recv_buffer_z1;
 
-#ifdef PARTICLES
-//Buffers for particles transfers
+    #ifdef PARTICLES
+// Buffers for particles transfers
 extern Real *d_send_buffer_x0_particles;
 extern Real *d_send_buffer_x1_particles;
 extern Real *d_send_buffer_y0_particles;
@@ -113,8 +116,7 @@ extern MPI_Request *recv_request_n_particles;
 // Request for Particles Transfer
 extern MPI_Request *send_request_particles_transfer;
 extern MPI_Request *recv_request_particles_transfer;
-#endif//PARTICLES
-
+    #endif  // PARTICLES
 
 extern int send_buffer_length;
 extern int recv_buffer_length;
@@ -134,9 +136,9 @@ extern ptrdiff_t nx_local_start;
 extern ptrdiff_t ny_local_start;
 extern ptrdiff_t nz_local_start;
 
-#ifdef   FFTW
+    #ifdef FFTW
 extern ptrdiff_t n_local_complex;
-#endif /*FFTW*/
+    #endif /*FFTW*/
 
 /*number of MPI procs in each dimension*/
 extern int nproc_x;
@@ -148,9 +150,9 @@ extern int nproc_z;
 void InitializeChollaMPI(int *pargc, char **pargv[]);
 
 /* Perform domain decomposition */
-void DomainDecomposition(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global);
+void DomainDecomposition(struct Parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global);
 
-void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global);
+void DomainDecompositionBLOCK(struct Parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global);
 
 /*tile MPI processes in a block decomposition*/
 void TileBlockDecomposition(void);
@@ -164,17 +166,26 @@ Real ReduceRealMin(Real x);
 /* MPI reduction wrapper for avg(Real)*/
 Real ReduceRealAvg(Real x);
 
-#ifdef PARTICLES
+/*!
+ * \brief MPI reduction wrapper to find the maximum of a size_t variable
+ *
+ * \param in The rank-local value to be reduced
+ * \return size_t The global reduced value
+ */
+size_t Reduce_size_t_Max(size_t in);
+
+    #ifdef PARTICLES
 /* MPI reduction wrapper for sum(part_int)*/
 Real ReducePartIntSum(part_int_t x);
 
-// Count the particles in the MPI ranks lower that this rank to get a global offset for the local IDs.
-part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local );
+// Count the particles in the MPI ranks lower that this rank to get a global
+// offset for the local IDs.
+part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local);
 
-// Function that checks if the buffer size For the particles transfer is large enough,
-// and grows the buffer if needed.
-void Check_and_Grow_Particles_Buffer( Real **part_buffer, int *current_size_ptr, int new_size );
-#endif
+// Function that checks if the buffer size For the particles transfer is large
+// enough, and grows the buffer if needed.
+void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, int new_size);
+    #endif
 
 /* Print information about the domain properties */
 void Print_Domain_Properties(struct Header H);
@@ -185,19 +196,25 @@ void Allocate_MPI_DeviceBuffers(struct Header *H);
 /* find the greatest prime factor of an integer */
 int greatest_prime_factor(int n);
 
-
 /*! \fn int ***three_dimensional_int_array(int n, int l, int m)
  *  *  \brief Allocate a three dimensional (n x l x m) int array
  *   */
 int ***three_dimensional_int_array(int n, int l, int m);
 
-/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int m)
- *  \brief De-allocate a three dimensional (n x l x m) int array.
+/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int
+ * m) \brief De-allocate a three dimensional (n x l x m) int array.
  *   */
 void deallocate_three_dimensional_int_array(int ***x, int n, int l, int m);
 
 /* Copy MPI receive buffers on Host to their device locations */
-void copyHostToDeviceReceiveBuffer ( int direction );
+void copyHostToDeviceReceiveBuffer(int direction);
+
+/*!
+ * \brief Split the communicator for each node and return IDs
+ *
+ * \return std::pair<int, int> The rank id and total number of processes
+ */
+std::pair<int, int> MPI_Comm_node();
 
-#endif /*MPI_ROUTINES_H*/
-#endif /*MPI_CHOLLA*/
+  #endif /*MPI_ROUTINES_H*/
+#endif   /*MPI_CHOLLA*/
diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp
index c907e64eb..428a0e864 100644
--- a/src/particles/density_CIC.cpp
+++ b/src/particles/density_CIC.cpp
@@ -1,41 +1,40 @@
 #ifdef PARTICLES
 
-#include <stdio.h>
-#include <stdlib.h>
-#include "math.h"
-#include <iostream>
-#include "../global/global.h"
-#include "../particles/particles_3D.h"
-#include "../grid/grid3D.h"
-#include "../io/io.h"
-
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
+  #include <stdio.h>
+  #include <stdlib.h>
 
+  #include <iostream>
 
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../particles/particles_3D.h"
+  #include "math.h"
 
-//Get the particles Cloud-In-Cell interpolated density
-void Particles_3D::Get_Density_CIC(){
+  #ifdef PARALLEL_OMP
+    #include "../utils/parallel_omp.h"
+  #endif
 
+// Get the particles Cloud-In-Cell interpolated density
+void Particles3D::Get_Density_CIC()
+{
   #ifdef PARTICLES_CPU
-  #ifdef PARALLEL_OMP
+    #ifdef PARALLEL_OMP
   Get_Density_CIC_OMP();
-  #else
+    #else
   Get_Density_CIC_Serial();
-  #endif //PARALLEL_OMP
+    #endif  // PARALLEL_OMP
   #endif
 
   #ifdef PARTICLES_GPU
   Get_Density_CIC_GPU();
   #endif
-
 }
 
-
-//Compute the particles density and copy it to the array in Grav to compute the potential
-void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P){
-
+// Compute the particles density and copy it to the array in Grav to compute the
+// potential
+void Grid3D::Copy_Particles_Density_to_Gravity(struct Parameters P)
+{
   #ifdef CPU_TIME
   Timer.Part_Density.Start();
   #endif
@@ -54,53 +53,53 @@ void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P){
   // Step 2: Transfer Particles CIC density Boundaries
   Transfer_Particles_Density_Boundaries(P);
 
-  //Step 3: Copy Particles density to Gravity array
+  // Step 3: Copy Particles density to Gravity array
   Copy_Particles_Density();
 
   #ifdef CPU_TIME
   Timer.Part_Dens_Transf.End();
   #endif
-
-
 }
 
-//Copy the particles density to the density array in Grav to compute the potential
-void Grid3D::Copy_Particles_Density(){
-
+// Copy the particles density to the density array in Grav to compute the
+// potential
+void Grid3D::Copy_Particles_Density()
+{
   #ifdef GRAVITY_GPU
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   Copy_Particles_Density_to_GPU();
-  #endif
+    #endif
   Copy_Particles_Density_GPU();
   #else
 
-  #ifndef PARALLEL_OMP
-  Copy_Particles_Density_function( 0, Grav.nz_local );
-  #else
+    #ifndef PARALLEL_OMP
+  Copy_Particles_Density_function(0, Grav.nz_local);
+    #else
 
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
 
-    Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end  );
+    Get_OMP_Grid_Indxs(Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end);
 
-    Copy_Particles_Density_function( g_start, g_end  );
+    Copy_Particles_Density_function(g_start, g_end);
   }
-  #endif//PARALLEL_OMP
+    #endif  // PARALLEL_OMP
 
-  #endif//GRAVITY_GPU
+  #endif  // GRAVITY_GPU
 }
 
-void Grid3D::Copy_Particles_Density_function( int g_start, int g_end ){
+void Grid3D::Copy_Particles_Density_function(int g_start, int g_end)
+{
   int nx_part, ny_part, nz_part, nGHST;
-  nGHST = Particles.G.n_ghost_particles_grid;
-  nx_part = Particles.G.nx_local + 2*nGHST;
-  ny_part = Particles.G.ny_local + 2*nGHST;
-  nz_part = Particles.G.nz_local + 2*nGHST;
+  nGHST   = Particles.G.n_ghost_particles_grid;
+  nx_part = Particles.G.nx_local + 2 * nGHST;
+  ny_part = Particles.G.ny_local + 2 * nGHST;
+  nz_part = Particles.G.nz_local + 2 * nGHST;
 
   int nx_dens, ny_dens, nz_dens;
   nx_dens = Grav.nx_local;
@@ -108,23 +107,22 @@ void Grid3D::Copy_Particles_Density_function( int g_start, int g_end ){
   nz_dens = Grav.nz_local;
 
   int i, j, k, id_CIC, id_grid;
-  for ( k=g_start; k<g_end; k++ ){
-    for ( j=0; j<ny_dens; j++ ){
-      for ( i=0; i<nx_dens; i++ ){
-        id_CIC = (i+nGHST) + (j+nGHST)*nx_part + (k+nGHST)*nx_part*ny_part;
-        id_grid = i + j*nx_dens + k*nx_dens*ny_dens;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny_dens; j++) {
+      for (i = 0; i < nx_dens; i++) {
+        id_CIC                    = (i + nGHST) + (j + nGHST) * nx_part + (k + nGHST) * nx_part * ny_part;
+        id_grid                   = i + j * nx_dens + k * nx_dens * ny_dens;
         Grav.F.density_h[id_grid] = Particles.G.density[id_CIC];
       }
     }
   }
 }
 
-
-//Clear the density array: density=0
-void::Particles_3D::Clear_Density(){
-
+// Clear the density array: density=0
+void ::Particles3D::Clear_Density()
+{
   #ifdef PARTICLES_CPU
-  for( int i=0; i<G.n_cells; i++ ) G.density[i] = 0;
+  for (int i = 0; i < G.n_cells; i++) G.density[i] = 0;
   #endif
 
   #ifdef PARTICLES_GPU
@@ -132,46 +130,44 @@ void::Particles_3D::Clear_Density(){
   #endif
 }
 
-#ifdef PARTICLES_GPU
-
-void Particles_3D::Clear_Density_GPU(){
-
-  Clear_Density_GPU_function( G.density_dev, G.n_cells);
-
-}
-
-void Particles_3D::Get_Density_CIC_GPU(){
+  #ifdef PARTICLES_GPU
 
-  Get_Density_CIC_GPU_function( n_local, particle_mass, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells, G.density, G.density_dev, pos_x_dev, pos_y_dev, pos_z_dev, mass_dev );
+void Particles3D::Clear_Density_GPU() { Clear_Density_GPU_function(G.density_dev, G.n_cells); }
 
+void Particles3D::Get_Density_CIC_GPU()
+{
+  Get_Density_CIC_GPU_function(n_local, particle_mass, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz,
+                               G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells, G.density,
+                               G.density_dev, pos_x_dev, pos_y_dev, pos_z_dev, mass_dev);
 }
 
-#endif //PARTICLES_GPU
-
+  #endif  // PARTICLES_GPU
 
-#ifdef PARTICLES_CPU
-//Get the CIC index from the particle position
-void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){
-  indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx );
-  indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy );
-  indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz );
+  #ifdef PARTICLES_CPU
+// Get the CIC index from the particle position
+void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z,
+                     int &indx_x, int &indx_y, int &indx_z)
+{
+  indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx);
+  indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy);
+  indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz);
 }
 
-
-//Comute the CIC density (NO OpenMP)
-void Particles_3D::Get_Density_CIC_Serial( ){
+// Comute the CIC density (NO OpenMP)
+void Particles3D::Get_Density_CIC_Serial()
+{
   int nGHST = G.n_ghost_particles_grid;
-  int nx_g = G.nx_local + 2*nGHST;
-  int ny_g = G.ny_local + 2*nGHST;
-  int nz_g = G.nz_local + 2*nGHST;
+  int nx_g  = G.nx_local + 2 * nGHST;
+  int ny_g  = G.ny_local + 2 * nGHST;
+  int nz_g  = G.nz_local + 2 * nGHST;
 
   Real xMin, yMin, zMin, dx, dy, dz;
   xMin = G.xMin;
   yMin = G.yMin;
   zMin = G.zMin;
-  dx = G.dx;
-  dy = G.dy;
-  dz = G.dz;
+  dx   = G.dx;
+  dy   = G.dy;
+  dz   = G.dz;
 
   part_int_t pIndx;
   int indx_x, indx_y, indx_z, indx;
@@ -179,11 +175,11 @@ void Particles_3D::Get_Density_CIC_Serial( ){
 
   Real cell_center_x, cell_center_y, cell_center_z;
   Real delta_x, delta_y, delta_z;
-  Real dV_inv = 1./(G.dx*G.dy*G.dz);
+  Real dV_inv = 1. / (G.dx * G.dy * G.dz);
   bool ignore, in_local;
 
-  for ( pIndx=0; pIndx < n_local; pIndx++ ){
-    ignore = false;
+  for (pIndx = 0; pIndx < n_local; pIndx++) {
+    ignore   = false;
     in_local = true;
 
     #ifdef SINGLE_PARTICLE_MASS
@@ -194,37 +190,37 @@ void Particles_3D::Get_Density_CIC_Serial( ){
     x_pos = pos_x[pIndx];
     y_pos = pos_y[pIndx];
     z_pos = pos_z[pIndx];
-    Get_Indexes_CIC( xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z );
-    if ( indx_x < -1 ) ignore = true;
-    if ( indx_y < -1 ) ignore = true;
-    if ( indx_z < -1 ) ignore = true;
-    if ( indx_x > nx_g-3  ) ignore = true;
-    if ( indx_y > ny_g-3  ) ignore = true;
-    if ( indx_y > nz_g-3  ) ignore = true;
-    if ( x_pos < G.xMin || x_pos >= G.xMax ) in_local = false;
-    if ( y_pos < G.yMin || y_pos >= G.yMax ) in_local = false;
-    if ( z_pos < G.zMin || z_pos >= G.zMax ) in_local = false;
-    if ( ! in_local  ) {
+    Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z);
+    if (indx_x < -1) ignore = true;
+    if (indx_y < -1) ignore = true;
+    if (indx_z < -1) ignore = true;
+    if (indx_x > nx_g - 3) ignore = true;
+    if (indx_y > ny_g - 3) ignore = true;
+    if (indx_y > nz_g - 3) ignore = true;
+    if (x_pos < G.xMin || x_pos >= G.xMax) in_local = false;
+    if (y_pos < G.yMin || y_pos >= G.yMax) in_local = false;
+    if (z_pos < G.zMin || z_pos >= G.zMax) in_local = false;
+    if (!in_local) {
       std::cout << " Density CIC Error:" << std::endl;
-      #ifdef PARTICLE_IDS
+    #ifdef PARTICLE_IDS
       std::cout << " Particle outside Local  domain    pID: " << partIDs[pIndx] << std::endl;
-      #else
+    #else
       std::cout << " Particle outside Local  domain " << std::endl;
-      #endif
-      std::cout << "  Domain X: " << G.xMin <<  "  " << G.xMax << std::endl;
-      std::cout << "  Domain Y: " << G.yMin <<  "  " << G.yMax << std::endl;
-      std::cout << "  Domain Z: " << G.zMin <<  "  " << G.zMax << std::endl;
+    #endif
+      std::cout << "  Domain X: " << G.xMin << "  " << G.xMax << std::endl;
+      std::cout << "  Domain Y: " << G.yMin << "  " << G.yMax << std::endl;
+      std::cout << "  Domain Z: " << G.zMin << "  " << G.zMax << std::endl;
       std::cout << "  Particle X: " << x_pos << std::endl;
       std::cout << "  Particle Y: " << y_pos << std::endl;
       std::cout << "  Particle Z: " << z_pos << std::endl;
-    continue;
+      continue;
     }
-    if ( ignore ){
-      #ifdef PARTICLE_IDS
+    if (ignore) {
+    #ifdef PARTICLE_IDS
       std::cout << "ERROR Density CIC Index    pID: " << partIDs[pIndx] << std::endl;
-      #else
+    #else
       std::cout << "ERROR Density CIC Index " << std::endl;
-      #endif
+    #endif
       std::cout << "Negative xIndx: " << x_pos << "  " << indx_x << std::endl;
       std::cout << "Negative zIndx: " << z_pos << "  " << indx_z << std::endl;
       std::cout << "Negative yIndx: " << y_pos << "  " << indx_y << std::endl;
@@ -235,75 +231,71 @@ void Particles_3D::Get_Density_CIC_Serial( ){
       // exit(-1);
       continue;
     }
-    cell_center_x = xMin + indx_x*dx + 0.5*dx;
-    cell_center_y = yMin + indx_y*dy + 0.5*dy;
-    cell_center_z = zMin + indx_z*dz + 0.5*dz;
-    delta_x = 1 - ( x_pos - cell_center_x ) / dx;
-    delta_y = 1 - ( y_pos - cell_center_y ) / dy;
-    delta_z = 1 - ( z_pos - cell_center_z ) / dz;
+    cell_center_x = xMin + indx_x * dx + 0.5 * dx;
+    cell_center_y = yMin + indx_y * dy + 0.5 * dy;
+    cell_center_z = zMin + indx_z * dz + 0.5 * dz;
+    delta_x       = 1 - (x_pos - cell_center_x) / dx;
+    delta_y       = 1 - (y_pos - cell_center_y) / dy;
+    delta_z       = 1 - (z_pos - cell_center_z) / dz;
     indx_x += nGHST;
     indx_y += nGHST;
     indx_z += nGHST;
 
-    indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g;
-    G.density[indx] += pMass  * delta_x * delta_y * delta_z;
+    indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g;
+    G.density[indx] += pMass * delta_x * delta_y * delta_z;
 
-    indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g;
-    G.density[indx] += pMass  * (1-delta_x) * delta_y * delta_z;
+    indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g;
+    G.density[indx] += pMass * (1 - delta_x) * delta_y * delta_z;
 
-    indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
-    G.density[indx] += pMass  * delta_x * (1-delta_y) * delta_z;
+    indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g;
+    G.density[indx] += pMass * delta_x * (1 - delta_y) * delta_z;
 
-    indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
-    G.density[indx] += pMass  * delta_x * delta_y * (1-delta_z);
+    indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g;
+    G.density[indx] += pMass * delta_x * delta_y * (1 - delta_z);
 
-    indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
-    G.density[indx] += pMass  * (1-delta_x) * (1-delta_y) * delta_z;
+    indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g;
+    G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * delta_z;
 
-    indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
-    G.density[indx] += pMass  * (1-delta_x) * delta_y * (1-delta_z);
+    indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g;
+    G.density[indx] += pMass * (1 - delta_x) * delta_y * (1 - delta_z);
 
-    indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
-    G.density[indx] += pMass  * delta_x * (1-delta_y) * (1-delta_z);
+    indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
+    G.density[indx] += pMass * delta_x * (1 - delta_y) * (1 - delta_z);
 
-    indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
-    G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z);
+    indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
+    G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
   }
 }
 
-
-
-#ifdef PARALLEL_OMP
-//Compute the CIC density when PARALLEL_OMP
-void Particles_3D::Get_Density_CIC_OMP( ){
-
-
-  //Span OpenMP threads
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+    #ifdef PARALLEL_OMP
+// Compute the CIC density when PARALLEL_OMP
+void Particles3D::Get_Density_CIC_OMP()
+{
+      // Span OpenMP threads
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id;
     int g_start, g_end;
     int n_omp_procs;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
 
     int nGHST = G.n_ghost_particles_grid;
-    int nx_g = G.nx_local + 2*nGHST;
-    int ny_g = G.ny_local + 2*nGHST;
-    int nz_g = G.nz_local + 2*nGHST;
+    int nx_g  = G.nx_local + 2 * nGHST;
+    int ny_g  = G.ny_local + 2 * nGHST;
+    int nz_g  = G.nz_local + 2 * nGHST;
 
     Real xMin, yMin, zMin, dx, dy, dz;
-    xMin = G.xMin;
-    yMin = G.yMin;
-    zMin = G.zMin;
-    dx = G.dx;
-    dy = G.dy;
-    dz = G.dz;
-    Real dV_inv = 1./(G.dx*G.dy*G.dz);
-
+    xMin        = G.xMin;
+    yMin        = G.yMin;
+    zMin        = G.zMin;
+    dx          = G.dx;
+    dy          = G.dy;
+    dz          = G.dz;
+    Real dV_inv = 1. / (G.dx * G.dy * G.dz);
 
-    Get_OMP_Grid_Indxs( nz_g, n_omp_procs, omp_id,  &g_start, &g_end );
+    Get_OMP_Grid_Indxs(nz_g, n_omp_procs, omp_id, &g_start, &g_end);
 
     part_int_t pIndx;
     int indx_x, indx_y, indx_z, indx;
@@ -314,37 +306,37 @@ void Particles_3D::Get_Density_CIC_OMP( ){
     bool ignore, in_local;
     bool add_1, add_2;
 
-    for ( pIndx=0; pIndx < n_local; pIndx++ ){
+    for (pIndx = 0; pIndx < n_local; pIndx++) {
       add_1 = false;
       add_2 = false;
 
-      z_pos = pos_z[pIndx];
-      indx_z = (int) floor( ( z_pos - zMin - 0.5*dz ) / dz );
+      z_pos  = pos_z[pIndx];
+      indx_z = (int)floor((z_pos - zMin - 0.5 * dz) / dz);
       indx_z += nGHST;
-      if ( (indx_z >= g_start) && (indx_z < g_end) ) add_1 = true;
-      if ( ((indx_z+1) >= g_start) && ((indx_z+1) < g_end) ) add_2 = true;
-      if (!( add_1 || add_2) ) continue;
+      if ((indx_z >= g_start) && (indx_z < g_end)) add_1 = true;
+      if (((indx_z + 1) >= g_start) && ((indx_z + 1) < g_end)) add_2 = true;
+      if (!(add_1 || add_2)) continue;
 
       ignore = false;
-      x_pos = pos_x[pIndx];
-      y_pos = pos_y[pIndx];
+      x_pos  = pos_x[pIndx];
+      y_pos  = pos_y[pIndx];
 
-      indx_x = (int) floor( ( x_pos - xMin - 0.5*dx ) / dx );
-      indx_y = (int) floor( ( y_pos - yMin - 0.5*dy ) / dy );
+      indx_x = (int)floor((x_pos - xMin - 0.5 * dx) / dx);
+      indx_y = (int)floor((y_pos - yMin - 0.5 * dy) / dy);
       indx_z -= nGHST;
 
-      if ( indx_x < -1 ) ignore = true;
-      if ( indx_y < -1 ) ignore = true;
-      if ( indx_z < -1 ) ignore = true;
-      if ( indx_x > nx_g-3  ) ignore = true;
-      if ( indx_y > ny_g-3  ) ignore = true;
-      if ( indx_y > nz_g-3  ) ignore = true;
-      if ( ignore ){
-        #ifdef PARTICLE_IDS
+      if (indx_x < -1) ignore = true;
+      if (indx_y < -1) ignore = true;
+      if (indx_z < -1) ignore = true;
+      if (indx_x > nx_g - 3) ignore = true;
+      if (indx_y > ny_g - 3) ignore = true;
+      if (indx_y > nz_g - 3) ignore = true;
+      if (ignore) {
+      #ifdef PARTICLE_IDS
         std::cout << "ERROR CIC Index    pID: " << partIDs[pIndx] << std::endl;
-        #else
+      #else
         std::cout << "ERROR CIC Index " << std::endl;
-        #endif
+      #endif
         std::cout << "Negative xIndx: " << x_pos << "  " << indx_x << std::endl;
         std::cout << "Negative zIndx: " << z_pos << "  " << indx_z << std::endl;
         std::cout << "Negative yIndx: " << y_pos << "  " << indx_y << std::endl;
@@ -356,23 +348,23 @@ void Particles_3D::Get_Density_CIC_OMP( ){
         continue;
       }
       in_local = true;
-      if ( x_pos < G.xMin || x_pos >= G.xMax ) in_local = false;
-      if ( y_pos < G.yMin || y_pos >= G.yMax ) in_local = false;
-      if ( z_pos < G.zMin || z_pos >= G.zMax ) in_local = false;
-      if ( ! in_local  ) {
+      if (x_pos < G.xMin || x_pos >= G.xMax) in_local = false;
+      if (y_pos < G.yMin || y_pos >= G.yMax) in_local = false;
+      if (z_pos < G.zMin || z_pos >= G.zMax) in_local = false;
+      if (!in_local) {
         std::cout << " Density CIC Error:" << std::endl;
-        #ifdef PARTICLE_IDS
+      #ifdef PARTICLE_IDS
         std::cout << " Particle outside Local  domain    pID: " << partIDs[pIndx] << std::endl;
-        #else
+      #else
         std::cout << " Particle outside Local  domain " << std::endl;
-        #endif
-        std::cout << "  Domain X: " << G.xMin <<  "  " << G.xMax << std::endl;
-        std::cout << "  Domain Y: " << G.yMin <<  "  " << G.yMax << std::endl;
-        std::cout << "  Domain Z: " << G.zMin <<  "  " << G.zMax << std::endl;
+      #endif
+        std::cout << "  Domain X: " << G.xMin << "  " << G.xMax << std::endl;
+        std::cout << "  Domain Y: " << G.yMin << "  " << G.yMax << std::endl;
+        std::cout << "  Domain Z: " << G.zMin << "  " << G.zMax << std::endl;
         std::cout << "  Particle X: " << x_pos << std::endl;
         std::cout << "  Particle Y: " << y_pos << std::endl;
         std::cout << "  Particle Z: " << z_pos << std::endl;
-      continue;
+        continue;
       }
 
       #ifdef SINGLE_PARTICLE_MASS
@@ -381,49 +373,48 @@ void Particles_3D::Get_Density_CIC_OMP( ){
       pMass = mass[pIndx] * dV_inv;
       #endif
 
-      cell_center_x = xMin + indx_x*dx + 0.5*dx;
-      cell_center_y = yMin + indx_y*dy + 0.5*dy;
-      cell_center_z = zMin + indx_z*dz + 0.5*dz;
-      delta_x = 1 - ( x_pos - cell_center_x ) / dx;
-      delta_y = 1 - ( y_pos - cell_center_y ) / dy;
-      delta_z = 1 - ( z_pos - cell_center_z ) / dz;
+      cell_center_x = xMin + indx_x * dx + 0.5 * dx;
+      cell_center_y = yMin + indx_y * dy + 0.5 * dy;
+      cell_center_z = zMin + indx_z * dz + 0.5 * dz;
+      delta_x       = 1 - (x_pos - cell_center_x) / dx;
+      delta_y       = 1 - (y_pos - cell_center_y) / dy;
+      delta_z       = 1 - (z_pos - cell_center_z) / dz;
       indx_x += nGHST;
       indx_y += nGHST;
       indx_z += nGHST;
 
-      if ( add_1 ){
-        indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g;
-        G.density[indx] += pMass  * delta_x * delta_y * delta_z;
+      if (add_1) {
+        indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g;
+        G.density[indx] += pMass * delta_x * delta_y * delta_z;
 
-        indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g;
-        G.density[indx] += pMass  * (1-delta_x) * delta_y * delta_z;
+        indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g;
+        G.density[indx] += pMass * (1 - delta_x) * delta_y * delta_z;
 
-        indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
-        G.density[indx] += pMass  * delta_x * (1-delta_y) * delta_z;
+        indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g;
+        G.density[indx] += pMass * delta_x * (1 - delta_y) * delta_z;
 
-        indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
-        G.density[indx] += pMass  * (1-delta_x) * (1-delta_y) * delta_z;
+        indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g;
+        G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * delta_z;
       }
 
-      if ( add_2 ){
-        indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
-        G.density[indx] += pMass  * delta_x * delta_y * (1-delta_z);
+      if (add_2) {
+        indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g;
+        G.density[indx] += pMass * delta_x * delta_y * (1 - delta_z);
 
-        indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
-        G.density[indx] += pMass  * (1-delta_x) * delta_y * (1-delta_z);
+        indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g;
+        G.density[indx] += pMass * (1 - delta_x) * delta_y * (1 - delta_z);
 
-        indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
-        G.density[indx] += pMass  * delta_x * (1-delta_y) * (1-delta_z);
+        indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
+        G.density[indx] += pMass * delta_x * (1 - delta_y) * (1 - delta_z);
 
-        indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
-        G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z);
+        indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
+        G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
       }
     }
   }
 }
-#endif //PARALLEL_OMP
-
-#endif //PARTICLES_CPU
+    #endif  // PARALLEL_OMP
 
+  #endif  // PARTICLES_CPU
 
 #endif
diff --git a/src/particles/density_CIC.h b/src/particles/density_CIC.h
index 393c99a6a..b7181e68d 100644
--- a/src/particles/density_CIC.h
+++ b/src/particles/density_CIC.h
@@ -1,9 +1,10 @@
 #ifdef PARTICLES
 
-#ifndef DENSITY_CIC_H
-#define DENSITY_CIC_H
+  #ifndef DENSITY_CIC_H
+    #define DENSITY_CIC_H
 
-void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z );
+void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z,
+                     int &indx_x, int &indx_y, int &indx_z);
 
-#endif
+  #endif
 #endif
\ No newline at end of file
diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu
index 977f84421..756c48643 100644
--- a/src/particles/density_CIC_gpu.cu
+++ b/src/particles/density_CIC_gpu.cu
@@ -1,143 +1,161 @@
 #ifdef PARTICLES
 
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../particles/particles_3D.h"
-#include "../grid/grid3D.h"
-
-#ifdef GRAVITY_GPU
-void Grid3D::Copy_Particles_Density_to_GPU(){
-  CudaSafeCall( cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells*sizeof(Real), cudaMemcpyHostToDevice) );
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <unistd.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../grid/grid3D.h"
+  #include "../particles/particles_3D.h"
+  #include "../utils/gpu.hpp"
+
+  #ifdef GRAVITY_GPU
+void Grid3D::Copy_Particles_Density_to_GPU()
+{
+  GPU_Error_Check(cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells * sizeof(Real),
+                             cudaMemcpyHostToDevice));
 }
 
-#endif
+  #endif
 
-#ifdef PARTICLES_GPU
+  #ifdef PARTICLES_GPU
 
-//Define atomic_add if it's not supported
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
-#else
-__device__ double atomicAdd(double* address, double val)
+    // Define atomic_add if it's not supported
+    #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+    #else
+__device__ double atomicAdd(double *address, double val)
 {
-    unsigned long long int* address_as_ull = (unsigned long long int*)address;
-    unsigned long long int old = *address_as_ull, assumed;
-    do {
-        assumed = old;
-        old = atomicCAS(address_as_ull, assumed,
-                __double_as_longlong(val + __longlong_as_double(assumed)));
-    } while (assumed != old);
-    return __longlong_as_double(old);
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
+  unsigned long long int old             = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old     = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+  return __longlong_as_double(old);
 }
-#endif
+    #endif
 
-//Get the CIC index from the particle position ( device function )
-__device__ void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){
-  indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx );
-  indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy );
-  indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz );
+// Get the CIC index from the particle position ( device function )
+__device__ void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y,
+                                Real pos_z, int &indx_x, int &indx_y, int &indx_z)
+{
+  indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx);
+  indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy);
+  indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz);
 }
 
-//CUDA Kernel to compute the CIC density from the particles positions
-__global__ void Get_Density_CIC_Kernel( part_int_t n_local, Real particle_mass,  Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost  ){
-
-  int tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid >= n_local) return;
+// CUDA Kernel to compute the CIC density from the particles positions
+__global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, Real *density_dev, Real *pos_x_dev,
+                                       Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev, Real xMin, Real yMin,
+                                       Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx,
+                                       int ny, int nz, int n_ghost)
+{
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid >= n_local) {
+    return;
+  }
 
   int nx_g, ny_g;
-  nx_g = nx + 2*n_ghost;
-  ny_g = ny + 2*n_ghost;
+  nx_g = nx + 2 * n_ghost;
+  ny_g = ny + 2 * n_ghost;
 
   Real pos_x, pos_y, pos_z, pMass;
   Real cell_center_x, cell_center_y, cell_center_z;
   Real delta_x, delta_y, delta_z;
-  Real dV_inv = 1./(dx*dy*dz);
+  Real dV_inv = 1. / (dx * dy * dz);
 
   pos_x = pos_x_dev[tid];
   pos_y = pos_y_dev[tid];
   pos_z = pos_z_dev[tid];
 
-  #ifdef SINGLE_PARTICLE_MASS
+    #ifdef SINGLE_PARTICLE_MASS
   pMass = particle_mass * dV_inv;
-  #else
+    #else
   pMass = mass_dev[tid] * dV_inv;
-  #endif
+    #endif
 
   int indx_x, indx_y, indx_z, indx;
-  Get_Indexes_CIC( xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z );
+  Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z);
 
   bool in_local = true;
 
-  if ( pos_x < xMin || pos_x >= xMax ) in_local = false;
-  if ( pos_y < yMin || pos_y >= yMax ) in_local = false;
-  if ( pos_z < zMin || pos_z >= zMax ) in_local = false;
-  if ( ! in_local  ) {
-    printf(" Density CIC Error: Particle outside local domain [%f  %f  %f]  [%f %f] [%f %f] [%f %f]\n ", pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax);
+  if (pos_x < xMin || pos_x >= xMax) {
+    in_local = false;
+  }
+  if (pos_y < yMin || pos_y >= yMax) {
+    in_local = false;
+  }
+  if (pos_z < zMin || pos_z >= zMax) {
+    in_local = false;
+  }
+  if (!in_local) {
+    printf(
+        " Density CIC Error: Particle outside local domain [%f  %f  %f]  [%f "
+        "%f] [%f %f] [%f %f]\n ",
+        pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax);
     return;
   }
 
-  cell_center_x = xMin + indx_x*dx + 0.5*dx;
-  cell_center_y = yMin + indx_y*dy + 0.5*dy;
-  cell_center_z = zMin + indx_z*dz + 0.5*dz;
-  delta_x = 1 - ( pos_x - cell_center_x ) / dx;
-  delta_y = 1 - ( pos_y - cell_center_y ) / dy;
-  delta_z = 1 - ( pos_z - cell_center_z ) / dz;
+  cell_center_x = xMin + indx_x * dx + 0.5 * dx;
+  cell_center_y = yMin + indx_y * dy + 0.5 * dy;
+  cell_center_z = zMin + indx_z * dz + 0.5 * dz;
+  delta_x       = 1 - (pos_x - cell_center_x) / dx;
+  delta_y       = 1 - (pos_y - cell_center_y) / dy;
+  delta_z       = 1 - (pos_z - cell_center_z) / dz;
   indx_x += n_ghost;
   indx_y += n_ghost;
   indx_z += n_ghost;
 
-
-  indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g;
+  indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g;
   // density_dev[indx] += pMass  * delta_x * delta_y * delta_z;
-  atomicAdd( &density_dev[indx],  pMass  * delta_x * delta_y * delta_z);
+  atomicAdd(&density_dev[indx], pMass * delta_x * delta_y * delta_z);
 
-  indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g;
+  indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g;
   // density_dev[indx] += pMass  * (1-delta_x) * delta_y * delta_z;
-  atomicAdd( &density_dev[indx], pMass  * (1-delta_x) * delta_y * delta_z);
+  atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * delta_y * delta_z);
 
-  indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
+  indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g;
   // density_dev[indx] += pMass  * delta_x * (1-delta_y) * delta_z;
-  atomicAdd( &density_dev[indx], pMass  * delta_x * (1-delta_y) * delta_z);
+  atomicAdd(&density_dev[indx], pMass * delta_x * (1 - delta_y) * delta_z);
   //
-  indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g;
   // density_dev[indx] += pMass  * delta_x * delta_y * (1-delta_z);
-  atomicAdd( &density_dev[indx], pMass  * delta_x * delta_y * (1-delta_z) );
+  atomicAdd(&density_dev[indx], pMass * delta_x * delta_y * (1 - delta_z));
 
-  indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
+  indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g;
   // density_dev[indx] += pMass  * (1-delta_x) * (1-delta_y) * delta_z;
-  atomicAdd( &density_dev[indx], pMass  * (1-delta_x) * (1-delta_y) * delta_z);
+  atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * (1 - delta_y) * delta_z);
 
-  indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g;
   // density_dev[indx] += pMass  * (1-delta_x) * delta_y * (1-delta_z);
-  atomicAdd( &density_dev[indx], pMass  * (1-delta_x) * delta_y * (1-delta_z));
+  atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * delta_y * (1 - delta_z));
 
-  indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
   // density_dev[indx] += pMass  * delta_x * (1-delta_y) * (1-delta_z);
-  atomicAdd( &density_dev[indx], pMass  * delta_x * (1-delta_y) * (1-delta_z));
+  atomicAdd(&density_dev[indx], pMass * delta_x * (1 - delta_y) * (1 - delta_z));
 
-  indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
   // density_dev[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z);
-  atomicAdd( &density_dev[indx], pMass * (1-delta_x) * (1-delta_y) * (1-delta_z));
-
+  atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z));
 }
 
-
-
-//Clear the density array: density=0
-void Particles_3D::Clear_Density_GPU_function( Real *density_dev, int n_cells){
-  Set_Particles_Array_Real( 0.0, density_dev, n_cells);
+// Clear the density array: density=0
+void Particles3D::Clear_Density_GPU_function(Real *density_dev, int n_cells)
+{
+  Set_Particles_Array_Real(0.0, density_dev, n_cells);
 }
 
-
-//Call the CIC density kernel to get the particles density
-void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass,  Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev , Real *pos_z_dev, Real *mass_dev){
-
+// Call the CIC density kernel to get the particles density
+void Particles3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin,
+                                               Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local,
+                                               int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells,
+                                               Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev,
+                                               Real *pos_z_dev, Real *mass_dev)
+{
   // set values for GPU kernels
-  int ngrid =  (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  int ngrid = (n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -145,16 +163,18 @@ void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particl
 
   // Only runs if there are local particles
   if (n_local > 0) {
-    hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local, particle_mass, density_dev, pos_x_dev, pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid );
-    CudaCheckError();
+    hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, particle_mass, density_dev,
+                       pos_x_dev, pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz,
+                       nx_local, ny_local, nz_local, n_ghost_particles_grid);
+    GPU_Error_Check();
     cudaDeviceSynchronize();
   }
 
-  #if !defined(GRAVITY_GPU)
-  //Copy the density from device to host
-  CudaSafeCall( cudaMemcpy(density_h, density_dev, n_cells*sizeof(Real), cudaMemcpyDeviceToHost) );
-  #endif
+    #if !defined(GRAVITY_GPU)
+  // Copy the density from device to host
+  GPU_Error_Check(cudaMemcpy(density_h, density_dev, n_cells * sizeof(Real), cudaMemcpyDeviceToHost));
+    #endif
 }
 
-#endif//PARTICLES_GPU
-#endif//PARTICLES
+  #endif  // PARTICLES_GPU
+#endif    // PARTICLES
diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp
index 3e5d56d7b..6884e99cd 100644
--- a/src/particles/density_boundaries.cpp
+++ b/src/particles/density_boundaries.cpp
@@ -1,128 +1,134 @@
 #ifdef PARTICLES
 
-#include "../io/io.h"
-#include "../grid/grid3D.h"
-#include "../particles/particles_3D.h"
-#include <iostream>
+  #include <iostream>
 
-//Copy the particles density boundaries for non-MPI PERIODIC transfers
-void Grid3D::Set_Particles_Density_Boundaries_Periodic( int direction, int side ){
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "particles_3D.h"
 
+// Copy the particles density boundaries for non-MPI PERIODIC transfers
+void Grid3D::Set_Particles_Density_Boundaries_Periodic(int direction, int side)
+{
   int i, j, k, indx_src, indx_dst;
   int nGHST, nx_g, ny_g, nz_g;
   nGHST = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*nGHST;
-  ny_g = Particles.G.ny_local + 2*nGHST;
-  nz_g = Particles.G.nz_local + 2*nGHST;
-
-  //Copy X boundaries
-  if (direction == 0){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nGHST; i++ ){
-          if ( side == 0 ){
-            indx_src = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-            indx_dst = (nx_g - 2*nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g;
-          }
-          if ( side == 1 ){
-            indx_src = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g;
-            indx_dst = (i+nGHST) + (j)*nx_g + (k)*nx_g*ny_g;
-          }
-          Particles.G.density[indx_dst] += Particles.G.density[indx_src] ;
+  nx_g  = Particles.G.nx_local + 2 * nGHST;
+  ny_g  = Particles.G.ny_local + 2 * nGHST;
+  nz_g  = Particles.G.nz_local + 2 * nGHST;
+
+  // Copy X boundaries
+  if (direction == 0) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nGHST; i++) {
+          if (side == 0) {
+            indx_src = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+            indx_dst = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx_src = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;
+            indx_dst = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          Particles.G.density[indx_dst] += Particles.G.density[indx_src];
         }
       }
     }
   }
 
-  //Copy Y boundaries
-  if (direction == 1){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<nGHST; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ){
-            indx_src = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-            indx_dst = (i) + (ny_g - 2*nGHST + j)*nx_g + (k)*nx_g*ny_g;
+  // Copy Y boundaries
+  if (direction == 1) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < nGHST; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx_src = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+            indx_dst = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g;
           }
-          if ( side == 1 ){
-            indx_src = (i) + (ny_g - nGHST + j)*nx_g + (k)*nx_g*ny_g;
-            indx_dst = (i) + (j+nGHST)*nx_g + (k)*nx_g*ny_g;
+          if (side == 1) {
+            indx_src = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g;
+            indx_dst = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g;
           }
-          Particles.G.density[indx_dst] += Particles.G.density[indx_src] ;
+          Particles.G.density[indx_dst] += Particles.G.density[indx_src];
         }
       }
     }
   }
 
-  //Copy Z boundaries
-  if (direction == 2){
-    for ( k=0; k<nGHST; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ){
-            indx_src = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-            indx_dst = (i) + (j)*nx_g + (nz_g - 2*nGHST + k)*nx_g*ny_g;
+  // Copy Z boundaries
+  if (direction == 2) {
+    for (k = 0; k < nGHST; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx_src = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+            indx_dst = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g;
           }
-          if ( side == 1 ){
-            indx_src = (i) + (j)*nx_g + (nz_g - nGHST + k)*nx_g*ny_g;
-            indx_dst = (i) + (j)*nx_g + (k+nGHST)*nx_g*ny_g;
+          if (side == 1) {
+            indx_src = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g;
+            indx_dst = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g;
           }
-          Particles.G.density[indx_dst] += Particles.G.density[indx_src] ;
+          Particles.G.density[indx_dst] += Particles.G.density[indx_src];
         }
       }
     }
   }
-
 }
 
-void Grid3D::Transfer_Particles_Density_Boundaries( struct parameters P ){
-
-  //Transfer the Particles Density Boundares
+void Grid3D::Transfer_Particles_Density_Boundaries(struct Parameters P)
+{
+  // Transfer the Particles Density Boundares
 
   Particles.TRANSFER_DENSITY_BOUNDARIES = true;
   Set_Boundary_Conditions(P);
   Particles.TRANSFER_DENSITY_BOUNDARIES = false;
-
 }
 
+  #ifdef MPI_CHOLLA
 
-#ifdef MPI_CHOLLA
-
-
-void Grid3D::Copy_Particles_Density_Buffer_Device_to_Host( int direction, int side, Real *buffer_d, Real *buffer_h ){
-
+void Grid3D::Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, Real *buffer_d, Real *buffer_h)
+{
   int nGHST, nx_g, ny_g, nz_g, buffer_length;
   nGHST = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*nGHST;
-  ny_g = Particles.G.ny_local + 2*nGHST;
-  nz_g = Particles.G.nz_local + 2*nGHST;
-  
-  if ( direction == 0 ) buffer_length = nGHST * ny_g * nz_g;
-  if ( direction == 1 ) buffer_length = nGHST * nx_g * nz_g;
-  if ( direction == 2 ) buffer_length = nGHST * nx_g * ny_g;
-  
-  cudaMemcpy( buffer_h, buffer_d, buffer_length*sizeof(Real), cudaMemcpyDeviceToHost);
-  
-}
+  nx_g  = Particles.G.nx_local + 2 * nGHST;
+  ny_g  = Particles.G.ny_local + 2 * nGHST;
+  nz_g  = Particles.G.nz_local + 2 * nGHST;
 
+  if (direction == 0) {
+    buffer_length = nGHST * ny_g * nz_g;
+  }
+  if (direction == 1) {
+    buffer_length = nGHST * nx_g * nz_g;
+  }
+  if (direction == 2) {
+    buffer_length = nGHST * nx_g * ny_g;
+  }
 
-//Load the particles density boundaries to the MPI buffers for transfer, return the size of the transfer buffer
-int Grid3D::Load_Particles_Density_Boundary_to_Buffer( int direction, int side, Real *buffer  ){
+  cudaMemcpy(buffer_h, buffer_d, buffer_length * sizeof(Real), cudaMemcpyDeviceToHost);
+}
 
+// Load the particles density boundaries to the MPI buffers for transfer, return
+// the size of the transfer buffer
+int Grid3D::Load_Particles_Density_Boundary_to_Buffer(int direction, int side, Real *buffer)
+{
   int i, j, k, indx, indx_buff, buffer_length;
   int nGHST, nx_g, ny_g, nz_g;
   nGHST = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*nGHST;
-  ny_g = Particles.G.ny_local + 2*nGHST;
-  nz_g = Particles.G.nz_local + 2*nGHST;
-
-  //Load Z boundaries
-  if (direction == 2){
-    for ( k=0; k<nGHST; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (j)*nx_g + (nz_g - nGHST + k)*nx_g*ny_g;
-          indx_buff = i + j*nx_g + k*nx_g*ny_g ;
+  nx_g  = Particles.G.nx_local + 2 * nGHST;
+  ny_g  = Particles.G.ny_local + 2 * nGHST;
+  nz_g  = Particles.G.nz_local + 2 * nGHST;
+
+  // Load Z boundaries
+  if (direction == 2) {
+    for (k = 0; k < nGHST; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (j)*nx_g + (nz_g - nGHST + k) * nx_g * ny_g;
+          }
+          indx_buff         = i + j * nx_g + k * nx_g * ny_g;
           buffer[indx_buff] = Particles.G.density[indx];
         }
       }
@@ -130,14 +136,18 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer( int direction, int side,
     buffer_length = nGHST * nx_g * ny_g;
   }
 
-  //Load Y boundaries
-  if (direction == 1){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<nGHST; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (ny_g - nGHST + j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = i + k*nx_g + j*nx_g*nz_g ;
+  // Load Y boundaries
+  if (direction == 1) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < nGHST; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (ny_g - nGHST + j) * nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff         = i + k * nx_g + j * nx_g * nz_g;
           buffer[indx_buff] = Particles.G.density[indx];
         }
       }
@@ -145,14 +155,18 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer( int direction, int side,
     buffer_length = nGHST * nx_g * nz_g;
   }
 
-  //Load X boundaries
-  if (direction == 0){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nGHST; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = j + k*ny_g + i*ny_g*nz_g ;
+  // Load X boundaries
+  if (direction == 0) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nGHST; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (nx_g - nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff         = j + k * ny_g + i * ny_g * nz_g;
           buffer[indx_buff] = Particles.G.density[indx];
         }
       }
@@ -160,56 +174,67 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer( int direction, int side,
     buffer_length = nGHST * ny_g * nz_g;
   }
 
-
   return buffer_length;
 }
 
-//Unload the particles density boundaries from the MPI buffers after transfer
-void Grid3D::Unload_Particles_Density_Boundary_From_Buffer( int direction, int side, Real *buffer  ){
-
+// Unload the particles density boundaries from the MPI buffers after transfer
+void Grid3D::Unload_Particles_Density_Boundary_From_Buffer(int direction, int side, Real *buffer)
+{
   int i, j, k, indx, indx_buff, buffer_length;
   int nGHST, nx_g, ny_g, nz_g;
   nGHST = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*nGHST;
-  ny_g = Particles.G.ny_local + 2*nGHST;
-  nz_g = Particles.G.nz_local + 2*nGHST;
+  nx_g  = Particles.G.nx_local + 2 * nGHST;
+  ny_g  = Particles.G.ny_local + 2 * nGHST;
+  nz_g  = Particles.G.nz_local + 2 * nGHST;
 
   // //Unload Z boundaries
-  if (direction == 2){
-    for ( k=0; k<nGHST; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j)*nx_g + (k + nGHST )*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (j)*nx_g + (nz_g - 2*nGHST + k)*nx_g*ny_g;
-          indx_buff = i + j*nx_g + k*nx_g*ny_g ;
+  if (direction == 2) {
+    for (k = 0; k < nGHST; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j)*nx_g + (k + nGHST) * nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (j)*nx_g + (nz_g - 2 * nGHST + k) * nx_g * ny_g;
+          }
+          indx_buff = i + j * nx_g + k * nx_g * ny_g;
           Particles.G.density[indx] += buffer[indx_buff];
         }
       }
     }
   }
 
-  //Unload Y boundaries
-  if (direction == 1){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<nGHST; j++ ){
-        for ( i=0; i<nx_g; i++ ){
-          if ( side == 0 ) indx = (i) + (j + nGHST)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (i) + (ny_g - 2*nGHST + j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = i + k*nx_g + j*nx_g*nz_g ;
+  // Unload Y boundaries
+  if (direction == 1) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < nGHST; j++) {
+        for (i = 0; i < nx_g; i++) {
+          if (side == 0) {
+            indx = (i) + (j + nGHST) * nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (i) + (ny_g - 2 * nGHST + j) * nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff = i + k * nx_g + j * nx_g * nz_g;
           Particles.G.density[indx] += buffer[indx_buff];
         }
       }
     }
   }
 
-  //Unload X boundaries
-  if (direction == 0){
-    for ( k=0; k<nz_g; k++ ){
-      for ( j=0; j<ny_g; j++ ){
-        for ( i=0; i<nGHST; i++ ){
-          if ( side == 0 ) indx = (i+nGHST) + (j)*nx_g + (k)*nx_g*ny_g;
-          if ( side == 1 ) indx = (nx_g - 2*nGHST + i) + (j)*nx_g + (k)*nx_g*ny_g;
-          indx_buff = j + k*ny_g + i*ny_g*nz_g ;
+  // Unload X boundaries
+  if (direction == 0) {
+    for (k = 0; k < nz_g; k++) {
+      for (j = 0; j < ny_g; j++) {
+        for (i = 0; i < nGHST; i++) {
+          if (side == 0) {
+            indx = (i + nGHST) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          if (side == 1) {
+            indx = (nx_g - 2 * nGHST + i) + (j)*nx_g + (k)*nx_g * ny_g;
+          }
+          indx_buff = j + k * ny_g + i * ny_g * nz_g;
           Particles.G.density[indx] += buffer[indx_buff];
         }
       }
@@ -217,7 +242,6 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer( int direction, int s
   }
 }
 
-#endif
-
+  #endif
 
 #endif
diff --git a/src/particles/density_boundaries_gpu.cu b/src/particles/density_boundaries_gpu.cu
index 56882d12e..fd5c4ddca 100644
--- a/src/particles/density_boundaries_gpu.cu
+++ b/src/particles/density_boundaries_gpu.cu
@@ -1,64 +1,88 @@
 #if defined(PARTICLES_GPU) && defined(GRAVITY_GPU)
 
-#include "../io/io.h"
-#include "../grid/grid3D.h"
-#include "../particles/particles_3D.h"
-#include <iostream>
+  #include <iostream>
 
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "particles_3D.h"
 
-
-__global__ void Set_Particles_Density_Boundaries_Periodic_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d   ){
-
+__global__ void Set_Particles_Density_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx,
+                                                                 int ny, int nz, int n_ghost, Real *density_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_src, tid_dst;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
-  
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return;
-
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_src = ( nx - n_ghost + tid_k )   + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 0 ) tid_dst = ( n_ghost + tid_k )        + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_src = ( tid_k )                  + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dst = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny;
-  }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_src = (tid_i) + ( ny - n_ghost + tid_k )*nx    + (tid_j)*nx*ny;
-    if ( side == 0 ) tid_dst = (tid_i) + ( n_ghost + tid_k )*nx         + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_src = (tid_i) + ( tid_k )*nx                   + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dst = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx  + (tid_j)*nx*ny;
-  }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_src = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k  )*nx*ny;
-    if ( side == 0 ) tid_dst = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny;
-    if ( side == 1 ) tid_src = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny;
-    if ( side == 1 ) tid_dst = (tid_i) + (tid_j)*nx + ( nz - 2* n_ghost + tid_k  )*nx*ny;
-  }
-  
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
+
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) {
+    return;
+  }
+
+  if (direction == 0) {
+    if (side == 0) {
+      tid_src = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 0) {
+      tid_dst = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_src = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dst = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+  }
+  if (direction == 1) {
+    if (side == 0) {
+      tid_src = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
+    if (side == 0) {
+      tid_dst = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_src = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dst = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
+  }
+  if (direction == 2) {
+    if (side == 0) {
+      tid_src = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny;
+    }
+    if (side == 0) {
+      tid_dst = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny;
+    }
+    if (side == 1) {
+      tid_src = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dst = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny;
+    }
+  }
+
   density_d[tid_dst] += density_d[tid_src];
-  
 }
 
-
-void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int side ){
-  
+void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU(int direction, int side)
+{
   int n_ghost, nx_g, ny_g, nz_g, size, ngrid, n_i, n_j;
   n_ghost = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*n_ghost;
-  ny_g = Particles.G.ny_local + 2*n_ghost;
-  nz_g = Particles.G.nz_local + 2*n_ghost;
+  nx_g    = Particles.G.nx_local + 2 * n_ghost;
+  ny_g    = Particles.G.ny_local + 2 * n_ghost;
+  nz_g    = Particles.G.nz_local + 2 * n_ghost;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_g;
     n_j = nz_g;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_g;
     n_j = nz_g;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_g;
     n_j = ny_g;
   }
@@ -66,74 +90,79 @@ void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int s
   size = n_ghost * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size - 1 ) / TPB_PARTICLES + 1;
+  ngrid = (size - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
 
-  hipLaunchKernelGGL( Set_Particles_Density_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev );
-  
+  hipLaunchKernelGGL(Set_Particles_Density_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side,
+                     n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev);
 }
 
+  #ifdef MPI_CHOLLA
 
-
-
-
-#ifdef MPI_CHOLLA
-
-
-
-__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d, Real *transfer_buffer_d   ){
-
+__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel(int direction, int side, int n_i, int n_j, int nx,
+                                                                 int ny, int nz, int n_ghost, Real *density_d,
+                                                                 Real *transfer_buffer_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) {
+    return;
+  }
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_dens = ( tid_k )                 + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dens = ( nx - n_ghost + tid_k )  + (tid_i)*nx + (tid_j)*nx*ny;
+  if (direction == 0) {
+    if (side == 0) {
+      tid_dens = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dens = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_dens = (tid_i) + ( tid_k )*nx                 + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dens = (tid_i) + ( ny - n_ghost + tid_k )*nx  + (tid_j)*nx*ny;
+  if (direction == 1) {
+    if (side == 0) {
+      tid_dens = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dens = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_dens = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny;
-    if ( side == 1 ) tid_dens = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k  )*nx*ny;
+  if (direction == 2) {
+    if (side == 0) {
+      tid_dens = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dens = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny;
+    }
   }
   transfer_buffer_d[tid_buffer] = density_d[tid_dens];
-
 }
 
-
-
-
-
-int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer  ){
-
+int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, Real *buffer)
+{
   int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j;
   n_ghost = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*n_ghost;
-  ny_g = Particles.G.ny_local + 2*n_ghost;
-  nz_g = Particles.G.nz_local + 2*n_ghost;
+  nx_g    = Particles.G.nx_local + 2 * n_ghost;
+  ny_g    = Particles.G.ny_local + 2 * n_ghost;
+  nz_g    = Particles.G.nz_local + 2 * n_ghost;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_g;
     n_j = nz_g;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_g;
     n_j = nz_g;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_g;
     n_j = ny_g;
   }
@@ -141,7 +170,7 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int si
   size_buffer = n_ghost * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_PARTICLES + 1;
+  ngrid = (size_buffer - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -153,65 +182,75 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int si
   Real *send_buffer_d;
   send_buffer_d = buffer;
 
-  hipLaunchKernelGGL( Load_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d  );
+  hipLaunchKernelGGL(Load_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side,
+                     n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d);
 
   cudaDeviceSynchronize();
 
   return size_buffer;
 }
 
-
-
-
-__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d, Real *transfer_buffer_d   ){
-
+__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel(int direction, int side, int n_i, int n_j, int nx,
+                                                                   int ny, int nz, int n_ghost, Real *density_d,
+                                                                   Real *transfer_buffer_d)
+{
   // get a global thread ID
   int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
-  tid_k = tid / (n_i*n_j);
-  tid_j = (tid - tid_k*n_i*n_j) / n_i;
-  tid_i = tid - tid_k*n_i*n_j - tid_j*n_i;
+  tid   = threadIdx.x + blockIdx.x * blockDim.x;
+  tid_k = tid / (n_i * n_j);
+  tid_j = (tid - tid_k * n_i * n_j) / n_i;
+  tid_i = tid - tid_k * n_i * n_j - tid_j * n_i;
 
-  if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return;
+  if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) {
+    return;
+  }
 
-  tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j;
+  tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j;
 
-  if ( direction == 0 ){
-    if ( side == 0 ) tid_dens = ( n_ghost + tid_k )        + (tid_i)*nx + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dens = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny;
+  if (direction == 0) {
+    if (side == 0) {
+      tid_dens = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dens = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 1 ){
-    if ( side == 0 ) tid_dens = (tid_i) + ( n_ghost + tid_k )*nx         + (tid_j)*nx*ny;
-    if ( side == 1 ) tid_dens = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx  + (tid_j)*nx*ny;
+  if (direction == 1) {
+    if (side == 0) {
+      tid_dens = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
+    if (side == 1) {
+      tid_dens = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny;
+    }
   }
-  if ( direction == 2 ){
-    if ( side == 0 ) tid_dens = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny;
-    if ( side == 1 ) tid_dens = (tid_i) + (tid_j)*nx + ( nz - 2* n_ghost + tid_k  )*nx*ny;
+  if (direction == 2) {
+    if (side == 0) {
+      tid_dens = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny;
+    }
+    if (side == 1) {
+      tid_dens = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny;
+    }
   }
   density_d[tid_dens] += transfer_buffer_d[tid_buffer];
-
 }
 
-
-
-
-void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer  ){
-
+void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, int side, Real *buffer)
+{
   int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j;
   n_ghost = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*n_ghost;
-  ny_g = Particles.G.ny_local + 2*n_ghost;
-  nz_g = Particles.G.nz_local + 2*n_ghost;
+  nx_g    = Particles.G.nx_local + 2 * n_ghost;
+  ny_g    = Particles.G.ny_local + 2 * n_ghost;
+  nz_g    = Particles.G.nz_local + 2 * n_ghost;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     n_i = ny_g;
     n_j = nz_g;
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     n_i = nx_g;
     n_j = nz_g;
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     n_i = nx_g;
     n_j = ny_g;
   }
@@ -219,7 +258,7 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, i
   size_buffer = n_ghost * n_i * n_j;
 
   // set values for GPU kernels
-  ngrid = ( size_buffer - 1 ) / TPB_PARTICLES + 1;
+  ngrid = (size_buffer - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -231,12 +270,10 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, i
   Real *recv_buffer_d;
   recv_buffer_d = buffer;
 
-  hipLaunchKernelGGL( Unload_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d  );
-
+  hipLaunchKernelGGL(Unload_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side,
+                     n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d);
 }
 
+  #endif  // MPI_CHOLLA
 
-
-#endif//MPI_CHOLLA
-
-#endif//PARTICLES_GPU
+#endif  // PARTICLES_GPU & GRAVITY_GPU
diff --git a/src/particles/feedback_CIC.cpp b/src/particles/feedback_CIC.cpp
deleted file mode 100644
index 64c2940f7..000000000
--- a/src/particles/feedback_CIC.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-#ifdef PARTICLES
-#ifdef DE
-#ifdef PARTICLE_AGE
-
-#include <iostream>
-#include "../particles/feedback_CIC.h"
-#include "../particles/particles_3D.h"
-#include "../grid/grid3D.h"
-#include "../particles/density_CIC.h"
-
-
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
-
-
-// simple energy feedback prescription
-Real getClusterEnergyFeedback(Real t, Real dt, Real age) {
-    if (t + age <= 1.0e4) return ENERGY_FEEDBACK_RATE * dt;
-    else return 0;
-}
-
-// simple feedback prescription
-Real getClusterMassFeedback(Real t, Real dt, Real age) {
-    //if (t + age <= 1.0e4) return 0.1 * dt; // 0.01 SN/ky/cluster * 10 solar mass ejected/SN
-    //if (t + age <= 1.0e4) return 10 * dt; // 1 SN/ky/cluster * 10 solar mass ejected/SN
-    //else return 0;
-    return 0;
-}
-
-
-void Grid3D::Cluster_Feedback(){
-  #ifdef PARTICLES_CPU
-  #ifndef PARALLEL_OMP
-  Cluster_Feedback_Function( 0, Particles.n_local );
-  #else
-  #pragma omp parallel num_threads( N_OMP_THREADS )
-  {
-    int omp_id, n_omp_procs;
-    part_int_t p_start, p_end;
-
-    omp_id = omp_get_thread_num();
-    n_omp_procs = omp_get_num_threads();
-
-    Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id,  &p_start, &p_end );
-
-    Cluster_Feedback_Function( p_start, p_end );
-  }
-  #endif //PARALLEL_OMP
-  #endif //PARTICLES_CPU
-}
-
-
-//Compute the CIC feedback
-void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end) {
-  int nx_g, ny_g, nz_g;
-  nx_g = H.nx;
-  ny_g = H.ny;
-  nz_g = H.nz;
-
-  Real xMin, yMin, zMin;
-  xMin = H.xblocal;  //TODO: make sure this is correct (and not H.xbound) (local min vs. global min)
-  yMin = H.yblocal;
-  zMin = H.zblocal;
-
-
-  part_int_t pIndx;
-  int indx_x, indx_y, indx_z, indx;
-  Real x_pos, y_pos, z_pos;
-  Real cell_center_x, cell_center_y, cell_center_z;
-  Real delta_x, delta_y, delta_z;
-  Real dV_inv = 1./(H.dx*H.dy*H.dz);
-  Real feedback_energy, feedback_density;
-
-  bool ignore, in_local;
-  for ( pIndx=p_start; pIndx < p_end; pIndx++ ){
-    ignore = false;
-    in_local = true;
-    // pMass = Particles.mass[pIndx] * dV_inv;
-    x_pos = Particles.pos_x[pIndx];
-    y_pos = Particles.pos_y[pIndx];
-    z_pos = Particles.pos_z[pIndx];
-    Get_Indexes_CIC( xMin, yMin, zMin, H.dx, H.dy, H.dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z );
-    if ( indx_x < -1 ) ignore = true;
-    if ( indx_y < -1 ) ignore = true;
-    if ( indx_z < -1 ) ignore = true;
-    if ( indx_x > nx_g-3  ) ignore = true;
-    if ( indx_y > ny_g-3  ) ignore = true;
-    if ( indx_y > nz_g-3  ) ignore = true;
-    if ( x_pos < H.xblocal || x_pos >= H.xblocal_max ) in_local = false;
-    if ( y_pos < H.yblocal || y_pos >= H.yblocal_max ) in_local = false;
-    if ( z_pos < H.zblocal || z_pos >= H.zblocal_max ) in_local = false;
-    if ( ! in_local  ) {
-      std::cout << " Cluster_FeedbackError:" << std::endl;
-      #ifdef PARTICLE_IDS
-      std::cout << " Particle outside Local  domain    pID: " << Particles.partIDs[pIndx] << std::endl;
-      #else
-      std::cout << " Particle outside Local  domain " << std::endl;
-      #endif
-      std::cout << "  Domain X: " << xMin <<  "  " << H.xblocal_max << std::endl;
-      std::cout << "  Domain Y: " << yMin <<  "  " << H.xblocal_max << std::endl;
-      std::cout << "  Domain Z: " << zMin <<  "  " << H.xblocal_max << std::endl;
-      std::cout << "  Particle X: " << x_pos << std::endl;
-      std::cout << "  Particle Y: " << y_pos << std::endl;
-      std::cout << "  Particle Z: " << z_pos << std::endl;
-      continue;
-    }
-    if ( ignore ){
-      #ifdef PARTICLE_IDS
-      std::cout << "ERROR Cluster_Feedback Index    pID: " << Particles.partIDs[pIndx] << std::endl;
-      #else
-      std::cout << "ERROR Cluster_Feedback Index " << std::endl;
-      #endif
-      std::cout << "Negative xIndx: " << x_pos << "  " << indx_x << std::endl;
-      std::cout << "Negative zIndx: " << z_pos << "  " << indx_z << std::endl;
-      std::cout << "Negative yIndx: " << y_pos << "  " << indx_y << std::endl;
-      std::cout << "Excess xIndx: " << x_pos << "  " << indx_x << std::endl;
-      std::cout << "Excess yIndx: " << y_pos << "  " << indx_y << std::endl;
-      std::cout << "Excess zIndx: " << z_pos << "  " << indx_z << std::endl;
-      std::cout << std::endl;
-      continue;
-    }
-
-    cell_center_x = xMin + indx_x*H.dx + 0.5*H.dx;
-    cell_center_y = yMin + indx_y*H.dy + 0.5*H.dy;
-    cell_center_z = zMin + indx_z*H.dz + 0.5*H.dz;
-    delta_x = 1 - ( x_pos - cell_center_x ) / H.dx;
-    delta_y = 1 - ( y_pos - cell_center_y ) / H.dy;
-    delta_z = 1 - ( z_pos - cell_center_z ) / H.dz;
-    indx_x += H.n_ghost;
-    indx_y += H.n_ghost;
-    indx_z += H.n_ghost;
-
-    feedback_energy = getClusterEnergyFeedback(H.t, H.dt, Particles.age[pIndx]) * dV_inv;
-    feedback_density = getClusterMassFeedback(H.t, H.dt, Particles.age[pIndx]) * dV_inv;
-
-    indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g;
-    C.density[indx] += feedback_density  * delta_x * delta_y * delta_z;
-    C.GasEnergy[indx] += feedback_energy  * delta_x * delta_y * delta_z;
-
-    indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g;
-    C.density[indx] += feedback_density  * (1-delta_x) * delta_y * delta_z;
-    C.GasEnergy[indx] += feedback_energy  * (1-delta_x) * delta_y * delta_z;
-
-    indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
-    C.density[indx] += feedback_density  * delta_x * (1-delta_y) * delta_z;
-    C.GasEnergy[indx] += feedback_energy  * delta_x * (1-delta_y) * delta_z;
-
-    indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
-    C.density[indx] += feedback_density  * delta_x * delta_y * (1-delta_z);
-    C.GasEnergy[indx] += feedback_energy  * delta_x * delta_y * (1-delta_z);
-
-    indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g;
-    C.density[indx] += feedback_density  * (1-delta_x) * (1-delta_y) * delta_z;
-    C.GasEnergy[indx] += feedback_energy  * (1-delta_x) * (1-delta_y) * delta_z;
-
-    indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g;
-    C.density[indx] += feedback_density  * (1-delta_x) * delta_y * (1-delta_z);
-    C.GasEnergy[indx] += feedback_energy  * (1-delta_x) * delta_y * (1-delta_z);
-
-    indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
-    C.density[indx] += feedback_density  * delta_x * (1-delta_y) * (1-delta_z);
-    C.GasEnergy[indx] += feedback_energy  * delta_x * (1-delta_y) * (1-delta_z);
-
-    indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
-    C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z);
-    C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z);
-  }
-}
-
-#endif //PARTICLE_AGE
-#endif //DE
-#endif //PARTICLES
diff --git a/src/particles/feedback_CIC.h b/src/particles/feedback_CIC.h
deleted file mode 100644
index 1775cb898..000000000
--- a/src/particles/feedback_CIC.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifdef PARTICLES
-#ifdef DE
-#ifdef PARTICLE_AGE
-
-
-#ifndef FEEDBACK_CIC_H
-#define FEEDBACK_CIC_H
-#include "../global/global.h"
-
-#define ENERGY_FEEDBACK_RATE 5.25958e-07  //Rate is 1e51 erg/100M_solar spread out over 10Myr
-
-Real getClusterEnergyFeedback(Real t, Real dt, Real age);
-Real getClusterMassFeedback(Real t, Real dt, Real age);
-
-#endif
-#endif
-#endif
-#endif
diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu
new file mode 100644
index 000000000..bd162e585
--- /dev/null
+++ b/src/particles/feedback_CIC_gpu.cu
@@ -0,0 +1,785 @@
+#if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && defined(PARTICLE_IDS)
+
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <unistd.h>
+
+  #include <cstring>
+  #include <fstream>
+  #include <sstream>
+  #include <vector>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "supernova.h"
+
+  #define TPB_FEEDBACK   128
+  #define FEED_INFO_N    6
+  #define I_RES          1  // unused
+  #define I_UNRES        2  // unused
+  #define I_ENERGY       3  // unused
+  #define I_MOMENTUM     4  // unused
+  #define I_UNRES_ENERGY 5  // used
+
+typedef curandStateMRG32k3a_t FeedbackPrng;
+// typedef curandStatePhilox4_32_10_t FeedbackPrng;
+
+namespace supernova
+{
+FeedbackPrng* randStates;
+part_int_t n_states;
+Real *dev_snr, snr_dt, time_sn_start, time_sn_end;
+int snr_n;
+}  // namespace supernova
+
+  #ifndef O_HIP
+// NOLINTNEXTLINE(readability-identifier-naming)
+__device__ double atomicMax(double* address, double val)
+{
+  auto* address_as_ull       = (unsigned long long int*)address;
+  unsigned long long int old = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old     = atomicCAS(address_as_ull, assumed, __double_as_longlong(fmax(val, __longlong_as_double(assumed))));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+  #endif  // O_HIP
+
+__global__ void Init_State_Kernel(unsigned int seed, FeedbackPrng* states)
+{
+  int id = blockIdx.x * blockDim.x + threadIdx.x;
+  curand_init(seed, id, 0, &states[id]);
+}
+
+/**
+ * @brief Does 2 things:
+ * -# Read in SN rate data from Starburst 99. If no file exists, assume a
+ * constant rate.
+ * -# Initialize the cuRAND state, which is analogous to the concept of
+ * generators in CPU code. The state object maintains configuration and status
+ * the cuRAND context for each thread on the GPU. Initialize more than the
+ * number of local particles since the latter will change through MPI transfers.
+ *
+ * @param P pointer to parameters struct. Passes in starburst 99 filename and
+ * random number gen seed.
+ * @param n_local  number of local particles on the GPU
+ * @param allocation_factor
+ */
+void supernova::initState(struct Parameters* P, part_int_t n_local, Real allocation_factor)
+{
+  chprintf("supernova::initState start\n");
+  std::string snr_filename(P->snr_filename);
+  if (not snr_filename.empty()) {
+    chprintf("Specified a SNR filename %s.\n", snr_filename.data());
+
+    // read in array of supernova rate values.
+    std::ifstream snr_in(snr_filename);
+    if (!snr_in.is_open()) {
+      chprintf("ERROR: but couldn't read SNR file.\n");
+      exit(-1);
+    }
+
+    std::vector<Real> snr_time;
+    std::vector<Real> snr;
+
+    const int N_HEADER    = 7;    // S'99 has 7 rows of header information
+    const char* s99_delim = " ";  // S'99 data separator
+    std::string line;
+    int line_counter = 0;
+
+    while (snr_in.good()) {
+      std::getline(snr_in, line);
+      if (line_counter++ < N_HEADER) {
+        continue;
+      }  // skip header processing
+
+      int i      = 0;
+      char* data = strtok(line.data(), s99_delim);
+      while (data != nullptr) {
+        if (i == 0) {
+          // in the following divide by # years per kyr (1000)
+          snr_time.push_back(std::stof(std::string(data)) / 1000);
+        } else if (i == 1) {
+          snr.push_back(pow(10, std::stof(std::string(data))) / 1000);
+        }
+        if (i > 0) {
+          break;  // only care about the first 2 items.  Once i = 1 can break
+        }         // here.
+
+        data = strtok(nullptr, s99_delim);
+        i++;
+      }
+    }
+
+    time_sn_end   = snr_time[snr_time.size() - 1];
+    time_sn_start = snr_time[0];
+    // the following is the time interval between data points
+    // (i.e. assumes regular temporal spacing)
+    snr_dt = (time_sn_end - time_sn_start) / (snr.size() - 1);
+
+    GPU_Error_Check(cudaMalloc((void**)&dev_snr, snr.size() * sizeof(Real)));
+    GPU_Error_Check(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), cudaMemcpyHostToDevice));
+
+  } else {
+    chprintf("No SN rate file specified.  Using constant rate\n");
+    time_sn_start = DEFAULT_SN_START;
+    time_sn_end   = DEFAULT_SN_END;
+  }
+
+  // Now initialize the poisson random number generator state.
+  n_states = n_local * allocation_factor;
+  GPU_Error_Check(cudaMalloc((void**)&randStates, n_states * sizeof(FeedbackPrng)));
+
+  int ngrid = (n_states - 1) / TPB_FEEDBACK + 1;
+  dim3 grid(ngrid);
+  dim3 block(TPB_FEEDBACK);
+
+  hipLaunchKernelGGL(Init_State_Kernel, grid, block, 0, 0, P->prng_seed, randStates);
+  GPU_Error_Check(cudaDeviceSynchronize());
+  chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", n_states, ngrid, TPB_FEEDBACK);
+}
+
+__device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, Real t_end)
+{
+  if (t < t_start || t >= t_end) {
+    return 0;
+  }
+  if (dev_snr == nullptr) {
+    return supernova::DEFAULT_SNR;
+  }
+
+  int index = (int)((t - t_start) / snr_dt);
+  return dev_snr[index] + (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt;
+}
+
+__device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, Real* momentum_y, Real* momentum_z,
+                              Real* energy, int index, Real dx, Real dy, Real dz, Real density_floor)
+{
+  Real dens  = fmax(density[index], density_floor);
+  Real d_inv = 1.0 / dens;
+  Real vx    = momentum_x[index] * d_inv;
+  Real vy    = momentum_y[index] * d_inv;
+  Real vz    = momentum_z[index] * d_inv;
+  Real P     = fmax((energy[index] - 0.5 * dens * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0), TINY_NUMBER);
+  Real cs    = sqrt(gamma * P * d_inv);
+  return fmax(fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy), (fabs(vz) + cs) / dz);
+}
+
+/** the prescription for dividing a scalar quantity between 3x3x3 cells is done
+   by imagining a 2x2x2 cell volume around the SN.  These fractions, then,
+   represent the linear extent of this volume into the cell in question. For i=0
+   this should be 1*1/2. For i=-1 this should be (1-dx)*1/2. For i=+1 this
+   should be dx*1/2. In the above the 1/2 factor is normalize over 2
+   cells/direction.
+  */
+__device__ Real Frac(int i, Real dx) { return (-0.5 * i * i - 0.5 * i + 1 + i * dx) * 0.5; }
+
+__device__ Real D_Fr(int i, Real dx)
+{
+  return (dx > 0.5) * i * (1 - 2 * dx) + ((i + 1) * dx + 0.5 * (i - 1)) - 3 * (i - 1) * (i + 1) * (0.5 - dx);
+}
+
+__device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, int nx_grid, int ny_grid, int n_ghost)
+{
+  Real d_average = 0.0;
+  for (int i = -1; i < 2; i++) {
+    for (int j = -1; j < 2; j++) {
+      for (int k = -1; k < 2; k++) {
+        d_average +=
+            density[(xi + n_ghost + i) + (yi + n_ghost + j) * nx_grid + (zi + n_ghost + k) * nx_grid * ny_grid];
+      }
+    }
+  }
+  return d_average / 27;
+}
+
+__device__ Real GetAverageNumberDensity_CGS(Real* density, int xi, int yi, int zi, int nx_grid, int ny_grid,
+                                            int n_ghost)
+{
+  return GetAverageDensity(density, xi, yi, zi, nx_grid, ny_grid, n_ghost) * DENSITY_UNIT / (supernova::MU * MP);
+}
+
+__device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, part_int_t n_local, int gtid,
+                                  Real dx)
+{
+  Real x0 = pos_x_dev[gtid];
+  Real y0 = pos_y_dev[gtid];
+  Real z0 = pos_z_dev[gtid];
+  // Brute force loop to see if particle is alone
+  for (int i = 0; i < n_local; i++) {
+    if (i == gtid) {
+      continue;
+    }
+    if (abs(x0 - pos_x_dev[i]) > dx) {
+      continue;
+    }
+    if (abs(y0 - pos_y_dev[i]) > dx) {
+      continue;
+    }
+    if (abs(z0 - pos_z_dev[i]) > dx) {
+      continue;
+    }
+    // If we made it here, something is too close.
+    return false;
+  }
+  return true;
+}
+
+__global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real* pos_x_dev, Real* pos_y_dev,
+                                        Real* pos_z_dev, Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin,
+                                        Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g,
+                                        int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density,
+                                        Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y,
+                                        Real* momentum_z, Real gamma, FeedbackPrng* states, Real* prev_dens,
+                                        int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start,
+                                        Real time_sn_end, int n_step, Real density_floor)
+{
+  __shared__ Real s_info[FEED_INFO_N * TPB_FEEDBACK];  // for collecting SN feedback information, like #
+                                                       // of SNe or # resolved.
+  int tid  = threadIdx.x;
+  int gtid = blockIdx.x * blockDim.x + tid;
+
+  s_info[FEED_INFO_N * tid]     = 0;  // number of supernovae
+  s_info[FEED_INFO_N * tid + 1] = 0;  // number of resolved events
+  s_info[FEED_INFO_N * tid + 2] = 0;  // number of unresolved events
+  s_info[FEED_INFO_N * tid + 3] = 0;  // resolved energy
+  s_info[FEED_INFO_N * tid + 4] = 0;  // unresolved momentum
+  s_info[FEED_INFO_N * tid + 5] = 0;  // unresolved KE added via momentum injection
+
+  if (gtid < n_local) {
+    Real pos_x, pos_y, pos_z;
+    Real cell_center_x, cell_center_y, cell_center_z;
+    Real delta_x, delta_y, delta_z;
+    Real x_frac, y_frac, z_frac;
+    Real px, py, pz, d;
+    // Real t_b, t_a, v_1, v_2, d_b, d_a, p_b, p_a, e;
+    Real feedback_energy = 0, feedback_density = 0, feedback_momentum = 0, n_0, shell_radius;
+    bool is_resolved = false;
+    Real dV          = dx * dy * dz;
+    Real local_dti   = 0.0;
+
+    pos_x = pos_x_dev[gtid];
+    pos_y = pos_y_dev[gtid];
+    pos_z = pos_z_dev[gtid];
+    // kernel_printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y,
+    // pos_z); kernel_printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin,
+    // xMin);
+
+    bool in_local =
+        (pos_x >= xMin && pos_x < xMax) && (pos_y >= yMin && pos_y < yMax) && (pos_z >= zMin && pos_z < zMax);
+    if (!in_local) {
+      kernel_printf(
+          " Feedback GPU: Particle outside local domain [%f  %f  %f]  [%f %f] "
+          "[%f %f] [%f %f]\n ",
+          pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax);
+    }
+
+    int indx_x = (int)floor((pos_x - xMin) / dx);
+    int indx_y = (int)floor((pos_y - yMin) / dy);
+    int indx_z = (int)floor((pos_z - zMin) / dz);
+    // kernel_printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z);
+
+    bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x >= nx_g - 2 * n_ghost ||
+                  indx_y >= ny_g - 2 * n_ghost || indx_z >= nz_g - 2 * n_ghost;
+    if (ignore) {
+      kernel_printf(
+          " Feedback GPU: Particle CIC index err [%f  %f  %f]  [%d %d %d] [%d "
+          "%d %d] \n ",
+          pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g);
+    }
+
+    // Avoid overlap issues for now
+    // bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev,
+    // n_local,
+    //                                  gtid, 6 * dx);
+
+    if (!ignore && in_local) {
+      int N = 0;
+      // only calculate this if there will be SN feedback
+      if ((t - age_dev[gtid]) <= time_sn_end) {
+        if (direction == -1) {
+          N = -prev_N[gtid];
+        } else {
+          Real average_num_sn =
+              GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, time_sn_start, time_sn_end) * mass_dev[gtid] * dt;
+
+          // N = (int) (average_num_sn + 0.5);
+
+          FeedbackPrng state;  // = states[0]; // load initial state
+
+          curand_init(42, 0, 0, &state);
+          unsigned long long skip = n_step * 10000 + id[gtid];
+          skipahead(skip, &state);  // provided by curand
+          // unsigned int debug_state = curand(&state);
+
+          N = (int)curand_poisson(&state, average_num_sn);
+
+          // states[gtid] = state; // don't write back to state, keep it
+          // pristine
+          prev_N[gtid] = N;
+        }
+        if (N != 0) {
+          mass_dev[gtid] -= N * supernova::MASS_PER_SN;
+          feedback_energy  = N * supernova::ENERGY_PER_SN / dV;
+          feedback_density = N * supernova::MASS_PER_SN / dV;
+          if (direction == -1) {
+            n_0 = prev_dens[gtid];
+          } else {
+            n_0             = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, nx_g, ny_g, n_ghost);
+            prev_dens[gtid] = n_0;
+          }
+          // int devcount;
+          // cudaGetDeviceCount(&devcount);
+          // int devId;
+          // cudaGetDevice(&devId);
+          // kernel_printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0:
+          // %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0);
+
+          feedback_momentum = direction * supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV;
+          shell_radius      = supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29);
+          is_resolved       = 3 * max(dx, max(dy, dz)) <= shell_radius;
+          if (!is_resolved) {
+            kernel_printf(
+                "UR[%f] at (%d, %d, %d)  id=%d, N=%d, shell_rad=%0.4e, "
+                "n_0=%0.4e\n",
+                t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, (int)id[gtid], N, shell_radius, n_0);
+          }
+
+          s_info[FEED_INFO_N * tid] = 1. * N;
+          if (is_resolved) {
+            s_info[FEED_INFO_N * tid + 1] = direction * 1.0;
+          } else {
+            s_info[FEED_INFO_N * tid + 2] = direction * 1.0;
+          }
+
+          int indx;
+
+          if (is_resolved) {  // if resolved inject energy and density
+            s_info[FEED_INFO_N * tid + 3] = feedback_energy * dV;
+
+            indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx);
+            indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy);
+            indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz);
+
+            cell_center_x = xMin + indx_x * dx + 0.5 * dx;
+            cell_center_y = yMin + indx_y * dy + 0.5 * dy;
+            cell_center_z = zMin + indx_z * dz + 0.5 * dz;
+
+            delta_x = 1 - (pos_x - cell_center_x) / dx;
+            delta_y = 1 - (pos_y - cell_center_y) / dy;
+            delta_z = 1 - (pos_z - cell_center_z) / dz;
+            indx_x += n_ghost;
+            indx_y += n_ghost;
+            indx_z += n_ghost;
+
+            for (int i = 0; i < 2; i++) {
+              for (int j = 0; j < 2; j++) {
+                for (int k = 0; k < 2; k++) {
+                  indx = (indx_x + i) + (indx_y + j) * nx_g + (indx_z + k) * nx_g * ny_g;
+
+                  if (abs(momentum_x[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Rb: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_y[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Rb: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_z[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Rb: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+
+                  // i_frac are the fractions of energy/density to be allocated
+                  // to each of the 8 cells.
+                  x_frac = i * (1 - delta_x) + (1 - i) * delta_x;
+                  y_frac = j * (1 - delta_y) + (1 - j) * delta_y;
+                  z_frac = k * (1 - delta_z) + (1 - k) * delta_z;
+
+                  atomicAdd(&density[indx], x_frac * y_frac * z_frac * feedback_density);
+                  atomicAdd(&gasEnergy[indx], x_frac * y_frac * z_frac * feedback_energy);
+                  atomicAdd(&energy[indx], x_frac * y_frac * z_frac * feedback_energy);
+
+                  if (abs(momentum_x[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ra: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_y[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ra: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_z[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ra: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+
+                  if (direction > 0) {
+                    local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z,
+                                                              energy, indx, dx, dy, dz, density_floor));
+                  }
+                }
+              }
+            }
+          } else {  // if not resolved, inject momentum and density
+            s_info[FEED_INFO_N * tid + 4] = feedback_momentum * dV;
+
+            delta_x = (pos_x - xMin - indx_x * dx) / dx;
+            delta_y = (pos_y - yMin - indx_y * dy) / dy;
+            delta_z = (pos_z - zMin - indx_z * dz) / dz;
+            // kernel_printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y,
+            // indx_z); kernel_printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x
+            // (%.2e, %.2e,
+            // %.2e)\n", gtid, pos_x, pos_y, pos_z, delta_x, delta_y, delta_z);
+
+            indx_x += n_ghost;
+            indx_y += n_ghost;
+            indx_z += n_ghost;
+
+            if (abs(feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5) >
+                40000) {  // injected speeds are greater than 4e4 km/s
+              kernel_printf("**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, indx_y, indx_z,
+                            feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5);
+            }
+            feedback_momentum /= sqrt(3.0);
+
+            for (int i = -1; i < 2; i++) {
+              for (int j = -1; j < 2; j++) {
+                for (int k = -1; k < 2; k++) {
+                  // index in array of conserved quantities
+                  indx = (indx_x + i) + (indx_y + j) * nx_g + (indx_z + k) * nx_g * ny_g;
+
+                  x_frac = D_Fr(i, delta_x) * Frac(j, delta_y) * Frac(k, delta_z);
+                  y_frac = Frac(i, delta_x) * D_Fr(j, delta_y) * Frac(k, delta_z);
+                  z_frac = Frac(i, delta_x) * Frac(j, delta_y) * D_Fr(k, delta_z);
+
+                  px = x_frac * feedback_momentum;
+                  py = y_frac * feedback_momentum;
+                  pz = z_frac * feedback_momentum;
+                  d  = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * feedback_density +
+                      n_0 * supernova::MU * MP / DENSITY_UNIT;
+
+                  // d  = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z)
+                  // * feedback_density; e  = frac(i, delta_x) * frac(j,
+                  // delta_y) * frac(k, delta_z) * feedback_energy;
+                  // kernel_printf("(%d, %d, %d): delta:(%.4e, %.4e, %.4e),
+                  // frac: %.4e\n", indx_x, indx_y, indx_z, delta_x, delta_y,
+                  // delta_z, frac(i, delta_x)*frac(j, delta_y)*frac(k,
+                  // delta_z)); kernel_printf("(%d, %d, %d):(%d SN) (i:%d, j:%d,
+                  // k:%d) before: %.4e\n", indx_x, indx_y, indx_z, N, i, j, k,
+                  // density[indx]*DENSITY_UNIT/0.6/MP);
+
+                  // v_1 = sqrt((momentum_x[indx]*momentum_x[indx] +
+                  // momentum_y[indx]*momentum_y[indx] +
+                  // momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx])*VELOCITY_UNIT/1e5;
+                  // t_b = gasEnergy[indx]*ENERGY_UNIT*(gamma -
+                  // 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); p_b =
+                  // sqrt(momentum_x[indx]*momentum_x[indx] +
+                  // momentum_y[indx]*momentum_y[indx] +
+                  // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; d_b =
+                  // density[indx]*DENSITY_UNIT/0.6/MP;
+
+                  if (abs(momentum_x[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ub: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_y[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ub: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_z[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ub: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+
+                  atomicAdd(&momentum_x[indx], px);
+                  atomicAdd(&momentum_y[indx], py);
+                  atomicAdd(&momentum_z[indx], pz);
+
+                  density[indx] = d;
+                  energy[indx]  = (momentum_x[indx] * momentum_x[indx] + momentum_y[indx] * momentum_y[indx] +
+                                  momentum_z[indx] * momentum_z[indx]) /
+                                     2 / density[indx] +
+                                 gasEnergy[indx];
+
+                  // atomicAdd(    &energy[indx], e );
+                  // atomicAdd(   &density[indx], d );
+
+                  s_info[FEED_INFO_N * tid + I_UNRES_ENERGY] +=
+                      direction * (px * px + py * py + pz * pz) / 2 / density[indx] * dV;
+
+                  if (abs(momentum_x[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ua: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_y[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ua: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  if (abs(momentum_z[indx] / density[indx]) >= C_L) {
+                    kernel_printf(
+                        "%d, Ua: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = "
+                        "%.3e\n",
+                        direction, indx_x + i, indx_y + j, indx_z + k,
+                        momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5,
+                        density[indx] * DENSITY_UNIT / 0.6 / MP, n_0);
+                  }
+                  // gasEnergy[indx] = energy[indx] -
+                  // (momentum_x[indx]*momentum_x[indx] +
+                  // momentum_y[indx]*momentum_y[indx] +
+                  // momentum_z[indx]*momentum_z[indx])/2/density[indx]; v_2 =
+                  // sqrt((momentum_x[indx]*momentum_x[indx] +
+                  // momentum_y[indx]*momentum_y[indx] +
+                  // momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx])
+                  // * VELOCITY_UNIT/1e5; t_a =
+                  // gasEnergy[indx]*ENERGY_UNIT*(gamma -
+                  // 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); d_a =
+                  // density[indx]*DENSITY_UNIT/0.6/MP; p_a =
+                  // sqrt(momentum_x[indx]*momentum_x[indx] +
+                  // momentum_y[indx]*momentum_y[indx] +
+                  // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5;
+
+                  // kernel_printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d,
+                  // k:%d) v_1: %.5e v_2: %.5e   V_DIFF-> %.4f %%\n", indx_x,
+                  // indx_y, indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2,
+                  // (v_2-v_1)/v_1*100); kernel_printf("   (%d, %d, %d):(%d SN)
+                  // (i:%d, j:%d, k:%d) T_b: %.5e T_a: %.5e   T_DIFF-> %.4f
+                  // %%\n", indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a,
+                  // (t_a-t_b)/t_b*100); kernel_printf("      (%d, %d, %d):(%d
+                  // SN) (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e   D_DIFF-> %.1f
+                  // %%\n", indx_x, indx_y, indx_z, N, i, j, k, d_b, d_a,
+                  // (d_a-d_b)/d_b*100); kernel_printf("         (%d, %d,
+                  // %d):(%d SN) (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF->
+                  // %.4f
+                  // %%\n", indx_x, indx_y, indx_z, N, i, j, k, p_b, p_a,
+                  // (p_a-p_b)/p_b*100);
+
+                  if (direction > 0) {
+                    // kernel_printf("urs time:%.3e id:%d N:%d d:%.5e\n", t,
+                    // id[gtid], N, n_0);
+                    local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z,
+                                                              energy, indx, dx, dy, dz, density_floor));
+                  }
+                }
+              }
+            }
+          }
+          if (direction > 0) {
+            atomicMax(dti, local_dti);
+          }
+        }
+      }
+    }
+  }
+
+  __syncthreads();
+
+  // reduce the info from all the threads in the block
+  for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) {
+    if (tid < s) {
+      s_info[FEED_INFO_N * tid] += s_info[FEED_INFO_N * (tid + s)];
+      s_info[FEED_INFO_N * tid + 1] += s_info[FEED_INFO_N * (tid + s) + 1];
+      s_info[FEED_INFO_N * tid + 2] += s_info[FEED_INFO_N * (tid + s) + 2];
+      s_info[FEED_INFO_N * tid + 3] += s_info[FEED_INFO_N * (tid + s) + 3];
+      s_info[FEED_INFO_N * tid + 4] += s_info[FEED_INFO_N * (tid + s) + 4];
+      s_info[FEED_INFO_N * tid + 5] += s_info[FEED_INFO_N * (tid + s) + 5];
+    }
+    __syncthreads();
+  }
+
+  if (tid == 0) {
+    info[FEED_INFO_N * blockIdx.x]     = s_info[0];
+    info[FEED_INFO_N * blockIdx.x + 1] = s_info[1];
+    info[FEED_INFO_N * blockIdx.x + 2] = s_info[2];
+    info[FEED_INFO_N * blockIdx.x + 3] = s_info[3];
+    info[FEED_INFO_N * blockIdx.x + 4] = s_info[4];
+    info[FEED_INFO_N * blockIdx.x + 5] = s_info[5];
+  }
+}
+
+Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis)
+{
+  #ifdef CPU_TIME
+  G.Timer.Feedback.Start();
+  #endif
+
+  if (G.H.dt == 0) {
+    return 0.0;
+  }
+
+  /*
+  if (G.Particles.n_local > supernova::n_states) {
+    printf("ERROR: not enough cuRAND states (%ld) for %ld local particles\n",
+           supernova::n_states, G.Particles.n_local);
+    exit(-1);
+  }
+  */
+
+  Real h_dti = 0.0;
+  int direction, ngrid;
+  Real h_info[6] = {0, 0, 0, 0, 0, 0};
+  Real info[6];
+  Real *d_dti, *d_info;
+  // require d_prev_dens & d_prev_N in case we have to undo feedback if the time
+  // step is too large.
+  Real* d_prev_dens;
+  int* d_prev_N;
+
+  if (G.Particles.n_local > 0) {
+    GPU_Error_Check(cudaMalloc(&d_dti, sizeof(Real)));
+    GPU_Error_Check(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMalloc(&d_prev_dens, G.Particles.n_local * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc(&d_prev_N, G.Particles.n_local * sizeof(int)));
+    GPU_Error_Check(cudaMemset(d_prev_dens, 0, G.Particles.n_local * sizeof(Real)));
+    GPU_Error_Check(cudaMemset(d_prev_N, 0, G.Particles.n_local * sizeof(int)));
+
+    ngrid = std::ceil((1. * G.Particles.n_local) / TPB_FEEDBACK);
+    GPU_Error_Check(cudaMalloc((void**)&d_info, FEED_INFO_N * ngrid * sizeof(Real)));
+  }
+  // TODO: info collection and max dti calculation
+  // assumes ngrid is 1.  The reason being that reduction of
+  // d_info is currently done on each block.  Only the first block reduction
+  // is used
+
+  do {
+    direction = 1;
+    if (G.Particles.n_local > 0) {
+      hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local,
+                         G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev,
+                         G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal,
+                         G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny,
+                         G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy,
+                         G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama,
+                         supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start,
+                         time_sn_end, G.H.n_step, G.H.density_floor);
+
+      GPU_Error_Check(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost));
+    }
+
+  #ifdef MPI_CHOLLA
+    h_dti = ReduceRealMax(h_dti);
+    MPI_Barrier(world);
+  #endif  // MPI_CHOLLA
+
+    if (h_dti != 0 && (C_cfl / h_dti < G.H.dt)) {
+      // timestep too big: need to undo the last operation
+      direction = -1;
+      if (G.Particles.n_local > 0) {
+        hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local,
+                           G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev,
+                           G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal,
+                           G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny,
+                           G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy,
+                           G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama,
+                           supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start,
+                           time_sn_end, G.H.n_step, G.H.density_floor);
+
+        GPU_Error_Check(cudaDeviceSynchronize());
+      }
+      G.H.dt = C_cfl / h_dti;
+    }
+
+  } while (direction == -1);
+
+  if (G.Particles.n_local > 0) {
+    GPU_Error_Check(cudaMemcpy(&h_info, d_info, FEED_INFO_N * sizeof(Real), cudaMemcpyDeviceToHost));
+    GPU_Error_Check(cudaFree(d_dti));
+    GPU_Error_Check(cudaFree(d_info));
+    GPU_Error_Check(cudaFree(d_prev_dens));
+    GPU_Error_Check(cudaFree(d_prev_N));
+  }
+
+  #ifdef MPI_CHOLLA
+  MPI_Reduce(&h_info, &info, FEED_INFO_N, MPI_CHREAL, MPI_SUM, root, world);
+  #else
+  info = h_info;
+  #endif
+
+  analysis.countSN += (int)info[supernova::SN];
+  analysis.countResolved += (int)info[supernova::RESOLVED];
+  analysis.countUnresolved += (int)info[supernova::NOT_RESOLVED];
+  analysis.totalEnergy += info[supernova::ENERGY];
+  analysis.totalMomentum += info[supernova::MOMENTUM];
+  analysis.totalUnresEnergy += info[supernova::UNRES_ENERGY];
+
+  Real resolved_ratio = 0.0;
+  if (info[supernova::RESOLVED] > 0 || info[supernova::NOT_RESOLVED] > 0) {
+    resolved_ratio = info[supernova::RESOLVED] / (info[supernova::RESOLVED] + info[supernova::NOT_RESOLVED]);
+  }
+  Real global_resolved_ratio = 0.0;
+  if (analysis.countResolved > 0 || analysis.countUnresolved > 0) {
+    global_resolved_ratio = (Real)(analysis.countResolved) / (Real)(analysis.countResolved + analysis.countUnresolved);
+  }
+
+  chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", G.H.n_step, (long)info[supernova::SN],
+           resolved_ratio);
+  chprintf(
+      "    this iteration: energy: %.5e erg.  momentum: %.5e S.M. km/s  "
+      "unres_energy: %.5e erg\n",
+      info[supernova::ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT,
+      info[supernova::MOMENTUM] * VELOCITY_UNIT / 1e5,
+      info[supernova::UNRES_ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT);
+  chprintf("    cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)analysis.countSN,
+           (long)analysis.countResolved, (long)analysis.countUnresolved, global_resolved_ratio);
+  chprintf(
+      "    energy: %.5e erg.  Total momentum: %.5e S.M. km/s, Total unres "
+      "energy: %.5e\n",
+      analysis.totalEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT,
+      analysis.totalMomentum * VELOCITY_UNIT / 1e5,
+      analysis.totalUnresEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT);
+
+  #ifdef CPU_TIME
+  G.Timer.Feedback.End();
+  #endif
+
+  return h_dti;
+}
+
+#endif  // SUPERNOVA & PARTICLES_GPU & PARTICLE_IDS & PARTICLE_AGE
diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp
index be28d06a1..495e7cf33 100644
--- a/src/particles/gravity_CIC.cpp
+++ b/src/particles/gravity_CIC.cpp
@@ -1,121 +1,121 @@
 #ifdef PARTICLES
 
-#include <stdio.h>
-#include <stdlib.h>
-#include "math.h"
-#include <iostream>
-#include "../global/global.h"
-#include "../grid/grid3D.h"
-#include "../io/io.h"
-#include "../particles/particles_3D.h"
-#include "../particles/density_CIC.h"
-#include "../model/disk_galaxy.h"
+  #include <stdio.h>
+  #include <stdlib.h>
 
+  #include <iostream>
 
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../model/disk_galaxy.h"
+  #include "density_CIC.h"
+  #include "math.h"
+  #include "particles_3D.h"
 
-//Get the Gravitational Field from the potential: g=-gradient(potential)
-void Grid3D::Get_Gravity_Field_Particles(){
+  #ifdef PARALLEL_OMP
+    #include "../utils/parallel_omp.h"
+  #endif
 
+// Get the Gravitational Field from the potential: g=-gradient(potential)
+void Grid3D::Get_Gravity_Field_Particles()
+{
   #ifdef PARTICLES_CPU
-  
-  #ifdef GRAVITY_GPU
+
+    #ifdef GRAVITY_GPU
   Copy_Potential_From_GPU();
-  #endif
+    #endif
 
-  #ifndef PARALLEL_OMP
-  Get_Gravity_Field_Particles_function( 0, Particles.G.nz_local + 2*Particles.G.n_ghost_particles_grid);
-  #else
+    #ifndef PARALLEL_OMP
+  Get_Gravity_Field_Particles_function(0, Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid);
+    #else
 
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     int g_start, g_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
 
-    Get_OMP_Grid_Indxs( Particles.G.nz_local + 2*Particles.G.n_ghost_particles_grid, N_OMP_THREADS, omp_id,  &g_start, &g_end );
+    Get_OMP_Grid_Indxs(Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid, N_OMP_THREADS, omp_id, &g_start,
+                       &g_end);
 
-    Get_Gravity_Field_Particles_function( g_start, g_end);
+    Get_Gravity_Field_Particles_function(g_start, g_end);
   }
-  #endif//PARALLEL_OMP
-  #endif//PARTICLES_CPU
-
+    #endif  // PARALLEL_OMP
+  #endif    // PARTICLES_CPU
 
   #ifdef PARTICLES_GPU
-  Particles.Get_Gravity_Field_Particles_GPU( Grav.F.potential_h );
+  Particles.Get_Gravity_Field_Particles_GPU(Grav.F.potential_h);
   #endif
-
 }
 
-
-void Grid3D::Get_Gravity_CIC(){
-
+void Grid3D::Get_Gravity_CIC()
+{
   #ifdef PARTICLES_CPU
 
-  #ifndef PARALLEL_OMP
-  Get_Gravity_CIC_function( 0, Particles.n_local );
-  #else
+    #ifndef PARALLEL_OMP
+  Get_Gravity_CIC_function(0, Particles.n_local);
+    #else
 
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     part_int_t p_start, p_end;
 
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
 
-    Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id,  &p_start, &p_end );
+    Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end);
 
-    Get_Gravity_CIC_function( p_start, p_end );
+    Get_Gravity_CIC_function(p_start, p_end);
   }
-  #endif//PARALLEL_OMP
-  #endif//PARTICLES_CPU
+    #endif  // PARALLEL_OMP
+  #endif    // PARTICLES_CPU
 
   #ifdef PARTICLES_GPU
   Particles.Get_Gravity_CIC_GPU();
   #endif
 }
 
-
-#ifdef PARTICLES_GPU
-void Particles_3D::Get_Gravity_Field_Particles_GPU( Real *potential_host ){
-
-  Get_Gravity_Field_Particles_GPU_function( G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells_potential, G.dx, G.dy, G.dz,  potential_host, G.potential_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev  );
-
+  #ifdef PARTICLES_GPU
+void Particles3D::Get_Gravity_Field_Particles_GPU(Real *potential_host)
+{
+  Get_Gravity_Field_Particles_GPU_function(G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid,
+                                           G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev,
+                                           G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev);
 }
 
-void Particles_3D::Get_Gravity_CIC_GPU(){
-
-  Get_Gravity_CIC_GPU_function( n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax,  G.dx, G.dy, G.dz,  pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev,  grav_y_dev,  grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev );
+void Particles3D::Get_Gravity_CIC_GPU()
+{
+  Get_Gravity_CIC_GPU_function(n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax,
+                               G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev,
+                               grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev);
 }
 
-#endif //PARTICLES_GPU
-
-
-#ifdef PARTICLES_CPU
+  #endif  // PARTICLES_GPU
 
-//Compute the gradient of the potential
-void Grid3D::Get_Gravity_Field_Particles_function( int g_start, int g_end ){
+  #ifdef PARTICLES_CPU
 
+// Compute the gradient of the potential
+void Grid3D::Get_Gravity_Field_Particles_function(int g_start, int g_end)
+{
   int nx_grav, ny_grav, nz_grav, nGHST_grav;
   nGHST_grav = Particles.G.n_ghost_particles_grid;
-  nx_grav = Particles.G.nx_local + 2*nGHST_grav;
-  ny_grav = Particles.G.ny_local + 2*nGHST_grav;
-  nz_grav = Particles.G.nz_local + 2*nGHST_grav;
+  nx_grav    = Particles.G.nx_local + 2 * nGHST_grav;
+  ny_grav    = Particles.G.ny_local + 2 * nGHST_grav;
+  nz_grav    = Particles.G.nz_local + 2 * nGHST_grav;
 
   int nx_grid, ny_grid, nz_grid, nGHST_grid;
   Real *potential;
 
-  potential = Grav.F.potential_h;
+  potential  = Grav.F.potential_h;
   nGHST_grid = N_GHOST_POTENTIAL;
 
-  nx_grid = Grav.nx_local + 2*nGHST_grid;
-  ny_grid = Grav.ny_local + 2*nGHST_grid;
-  nz_grid = Grav.nz_local + 2*nGHST_grid;
+  nx_grid = Grav.nx_local + 2 * nGHST_grid;
+  ny_grid = Grav.ny_local + 2 * nGHST_grid;
+  nz_grid = Grav.nz_local + 2 * nGHST_grid;
 
   int nGHST = nGHST_grid - nGHST_grav;
 
@@ -124,96 +124,97 @@ void Grid3D::Get_Gravity_Field_Particles_function( int g_start, int g_end ){
   dy = Particles.G.dy;
   dz = Particles.G.dz;
 
-  #ifdef GRAVITY_5_POINTS_GRADIENT
+    #ifdef GRAVITY_5_POINTS_GRADIENT
   Real phi_ll, phi_rr;
   int id_ll, id_rr;
-  #endif
+    #endif
 
   Real phi_l, phi_r;
   int k, j, i, id_l, id_r, id;
-  for ( k=g_start; k<g_end; k++ ){
-    for ( j=0; j<ny_grav; j++ ){
-      for ( i=0; i<nx_grav; i++ ){
-        id   = (i) + (j)*nx_grav + (k)*ny_grav*nx_grav;
-        id_l = (i-1 + nGHST) + (j + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
-        id_r = (i+1 + nGHST) + (j + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny_grav; j++) {
+      for (i = 0; i < nx_grav; i++) {
+        id    = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav;
+        id_l  = (i - 1 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
+        id_r  = (i + 1 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
         phi_l = potential[id_l];
         phi_r = potential[id_r];
-        #ifdef GRAVITY_5_POINTS_GRADIENT
-        id_ll = (i-2 + nGHST) + (j + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
-        id_rr = (i+2 + nGHST) + (j + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
-        phi_ll = potential[id_ll];
-        phi_rr = potential[id_rr];
-        Particles.G.gravity_x[id] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dx);
-        #else
-        Particles.G.gravity_x[id] = -0.5 * ( phi_r - phi_l ) / dx;
-        #endif
+    #ifdef GRAVITY_5_POINTS_GRADIENT
+        id_ll                     = (i - 2 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
+        id_rr                     = (i + 2 + nGHST) + (j + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
+        phi_ll                    = potential[id_ll];
+        phi_rr                    = potential[id_rr];
+        Particles.G.gravity_x[id] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx);
+    #else
+        Particles.G.gravity_x[id] = -0.5 * (phi_r - phi_l) / dx;
+    #endif
       }
     }
   }
 
-  for ( k=g_start; k<g_end; k++ ){
-    for ( j=0; j<ny_grav; j++ ){
-      for ( i=0; i<nx_grav; i++ ){
-        id   = (i) + (j)*nx_grav + (k)*ny_grav*nx_grav;
-        id_l = (i + nGHST) + (j-1 + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
-        id_r = (i + nGHST) + (j+1 + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny_grav; j++) {
+      for (i = 0; i < nx_grav; i++) {
+        id    = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav;
+        id_l  = (i + nGHST) + (j - 1 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
+        id_r  = (i + nGHST) + (j + 1 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
         phi_l = potential[id_l];
         phi_r = potential[id_r];
-        #ifdef GRAVITY_5_POINTS_GRADIENT
-        id_ll = (i + nGHST) + (j-2 + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
-        id_rr = (i + nGHST) + (j+2 + nGHST)*nx_grid + (k + nGHST)*ny_grid*nx_grid;
-        phi_ll = potential[id_ll];
-        phi_rr = potential[id_rr];
-        Particles.G.gravity_y[id] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dy);
-        //if (i == 0) {
-        //  std::cout << "phi_ll[" << id_ll << "] = " << phi_ll << std::endl;
-        //}
-        #else
-        Particles.G.gravity_y[id] = -0.5 * ( phi_r - phi_l ) / dy;
-        #endif
+    #ifdef GRAVITY_5_POINTS_GRADIENT
+        id_ll                     = (i + nGHST) + (j - 2 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
+        id_rr                     = (i + nGHST) + (j + 2 + nGHST) * nx_grid + (k + nGHST) * ny_grid * nx_grid;
+        phi_ll                    = potential[id_ll];
+        phi_rr                    = potential[id_rr];
+        Particles.G.gravity_y[id] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy);
+    // if (i == 0) {
+    //   std::cout << "phi_ll[" << id_ll << "] = " << phi_ll << std::endl;
+    // }
+    #else
+        Particles.G.gravity_y[id] = -0.5 * (phi_r - phi_l) / dy;
+    #endif
       }
     }
   }
 
-  for ( k=g_start; k<g_end; k++ ){
-    for ( j=0; j<ny_grav; j++ ){
-      for ( i=0; i<nx_grav; i++ ){
-        id   = (i) + (j)*nx_grav + (k)*ny_grav*nx_grav;
-        id_l = (i + nGHST) + (j + nGHST)*nx_grid + (k-1 + nGHST)*ny_grid*nx_grid;
-        id_r = (i + nGHST) + (j + nGHST)*nx_grid + (k+1 + nGHST)*ny_grid*nx_grid;
+  for (k = g_start; k < g_end; k++) {
+    for (j = 0; j < ny_grav; j++) {
+      for (i = 0; i < nx_grav; i++) {
+        id    = (i) + (j)*nx_grav + (k)*ny_grav * nx_grav;
+        id_l  = (i + nGHST) + (j + nGHST) * nx_grid + (k - 1 + nGHST) * ny_grid * nx_grid;
+        id_r  = (i + nGHST) + (j + nGHST) * nx_grid + (k + 1 + nGHST) * ny_grid * nx_grid;
         phi_l = potential[id_l];
         phi_r = potential[id_r];
-        #ifdef GRAVITY_5_POINTS_GRADIENT
-        id_ll = (i + nGHST) + (j + nGHST)*nx_grid + (k-2 + nGHST)*ny_grid*nx_grid;
-        id_rr = (i + nGHST) + (j + nGHST)*nx_grid + (k+2 + nGHST)*ny_grid*nx_grid;
-        phi_ll = potential[id_ll];
-        phi_rr = potential[id_rr];
-        Particles.G.gravity_z[id] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dz);
-        #else
-        Particles.G.gravity_z[id] = -0.5 * ( phi_r - phi_l ) / dz;
-        #endif
+    #ifdef GRAVITY_5_POINTS_GRADIENT
+        id_ll                     = (i + nGHST) + (j + nGHST) * nx_grid + (k - 2 + nGHST) * ny_grid * nx_grid;
+        id_rr                     = (i + nGHST) + (j + nGHST) * nx_grid + (k + 2 + nGHST) * ny_grid * nx_grid;
+        phi_ll                    = potential[id_ll];
+        phi_rr                    = potential[id_rr];
+        Particles.G.gravity_z[id] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz);
+    #else
+        Particles.G.gravity_z[id] = -0.5 * (phi_r - phi_l) / dz;
+    #endif
       }
     }
   }
 }
 
-//Get the CIC interpolation of the Gravitational field at the particles positions
-void Grid3D::Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end ){
-
+// Get the CIC interpolation of the Gravitational field at the particles
+// positions
+void Grid3D::Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end)
+{
   int nx_g, ny_g, nz_g, nGHST;
   nGHST = Particles.G.n_ghost_particles_grid;
-  nx_g = Particles.G.nx_local + 2*nGHST;
-  ny_g = Particles.G.ny_local + 2*nGHST;
-  nz_g = Particles.G.nz_local + 2*nGHST;
+  nx_g  = Particles.G.nx_local + 2 * nGHST;
+  ny_g  = Particles.G.ny_local + 2 * nGHST;
+  nz_g  = Particles.G.nz_local + 2 * nGHST;
 
   Real xMin, yMin, zMin, dx, dy, dz;
   xMin = Particles.G.xMin;
   yMin = Particles.G.yMin;
   zMin = Particles.G.zMin;
-  dx = Particles.G.dx;
-  dy = Particles.G.dy;
-  dz = Particles.G.dz;
+  dx   = Particles.G.dx;
+  dy   = Particles.G.dy;
+  dz   = Particles.G.dz;
 
   part_int_t pIndx;
   int indx_x, indx_y, indx_z, indx;
@@ -225,33 +226,33 @@ void Grid3D::Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end ){
   Real g_z_bl, g_z_br, g_z_bu, g_z_bru, g_z_tl, g_z_tr, g_z_tu, g_z_tru;
   Real g_x, g_y, g_z;
   bool ignore, in_local;
-  for ( pIndx=p_start; pIndx < p_end; pIndx++ ){
-    ignore = false;
+  for (pIndx = p_start; pIndx < p_end; pIndx++) {
+    ignore   = false;
     in_local = true;
     // pMass = Particles.mass[pIndx] * dV_inv;
     x_pos = Particles.pos_x[pIndx];
     y_pos = Particles.pos_y[pIndx];
     z_pos = Particles.pos_z[pIndx];
-    Get_Indexes_CIC( xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z );
-    if ( indx_x < -1 ) ignore = true;
-    if ( indx_y < -1 ) ignore = true;
-    if ( indx_z < -1 ) ignore = true;
-    if ( indx_x > nx_g-3  ) ignore = true;
-    if ( indx_y > ny_g-3  ) ignore = true;
-    if ( indx_y > nz_g-3  ) ignore = true;
-    if ( x_pos < Particles.G.xMin || x_pos >= Particles.G.xMax ) in_local = false;
-    if ( y_pos < Particles.G.yMin || y_pos >= Particles.G.yMax ) in_local = false;
-    if ( z_pos < Particles.G.zMin || z_pos >= Particles.G.zMax ) in_local = false;
-    if ( ! in_local  ) {
+    Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z);
+    if (indx_x < -1) ignore = true;
+    if (indx_y < -1) ignore = true;
+    if (indx_z < -1) ignore = true;
+    if (indx_x > nx_g - 3) ignore = true;
+    if (indx_y > ny_g - 3) ignore = true;
+    if (indx_y > nz_g - 3) ignore = true;
+    if (x_pos < Particles.G.xMin || x_pos >= Particles.G.xMax) in_local = false;
+    if (y_pos < Particles.G.yMin || y_pos >= Particles.G.yMax) in_local = false;
+    if (z_pos < Particles.G.zMin || z_pos >= Particles.G.zMax) in_local = false;
+    if (!in_local) {
       std::cout << " Gravity CIC Error:" << std::endl;
-      #ifdef PARTICLE_IDS
+    #ifdef PARTICLE_IDS
       std::cout << " Particle outside Local  domain    pID: " << Particles.partIDs[pIndx] << std::endl;
-      #else
+    #else
       std::cout << " Particle outside Local  domain " << std::endl;
-      #endif
-      std::cout << "  Domain X: " << Particles.G.xMin <<  "  " << Particles.G.xMax << std::endl;
-      std::cout << "  Domain Y: " << Particles.G.yMin <<  "  " << Particles.G.yMax << std::endl;
-      std::cout << "  Domain Z: " << Particles.G.zMin <<  "  " << Particles.G.zMax << std::endl;
+    #endif
+      std::cout << "  Domain X: " << Particles.G.xMin << "  " << Particles.G.xMax << std::endl;
+      std::cout << "  Domain Y: " << Particles.G.yMin << "  " << Particles.G.yMax << std::endl;
+      std::cout << "  Domain Z: " << Particles.G.zMin << "  " << Particles.G.zMax << std::endl;
       std::cout << "  Particle X: " << x_pos << std::endl;
       std::cout << "  Particle Y: " << y_pos << std::endl;
       std::cout << "  Particle Z: " << z_pos << std::endl;
@@ -260,12 +261,12 @@ void Grid3D::Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end ){
       // Particles.grav_z[pIndx] = 0;
       continue;
     }
-    if ( ignore ){
-      #ifdef PARTICLE_IDS
+    if (ignore) {
+    #ifdef PARTICLE_IDS
       std::cout << "ERROR GRAVITY_CIC Index    pID: " << Particles.partIDs[pIndx] << std::endl;
-      #else
+    #else
       std::cout << "ERROR GRAVITY_CIC Index " << std::endl;
-      #endif
+    #endif
       std::cout << "Negative xIndx: " << x_pos << "  " << indx_x << std::endl;
       std::cout << "Negative zIndx: " << z_pos << "  " << indx_z << std::endl;
       std::cout << "Negative yIndx: " << y_pos << "  " << indx_y << std::endl;
@@ -276,70 +277,70 @@ void Grid3D::Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end ){
       continue;
     }
 
-    cell_center_x = xMin + indx_x*dx + 0.5*dx;
-    cell_center_y = yMin + indx_y*dy + 0.5*dy;
-    cell_center_z = zMin + indx_z*dz + 0.5*dz;
-    delta_x = 1 - ( x_pos - cell_center_x ) / dx;
-    delta_y = 1 - ( y_pos - cell_center_y ) / dy;
-    delta_z = 1 - ( z_pos - cell_center_z ) / dz;
+    cell_center_x = xMin + indx_x * dx + 0.5 * dx;
+    cell_center_y = yMin + indx_y * dy + 0.5 * dy;
+    cell_center_z = zMin + indx_z * dz + 0.5 * dz;
+    delta_x       = 1 - (x_pos - cell_center_x) / dx;
+    delta_y       = 1 - (y_pos - cell_center_y) / dy;
+    delta_z       = 1 - (z_pos - cell_center_z) / dz;
     indx_x += nGHST;
     indx_y += nGHST;
     indx_z += nGHST;
 
-    indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g;
+    indx   = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g;
     g_x_bl = Particles.G.gravity_x[indx];
     g_y_bl = Particles.G.gravity_y[indx];
     g_z_bl = Particles.G.gravity_z[indx];
 
-    indx = (indx_x+1) + (indx_y)*nx_g + (indx_z)*nx_g*ny_g;
+    indx   = (indx_x + 1) + (indx_y)*nx_g + (indx_z)*nx_g * ny_g;
     g_x_br = Particles.G.gravity_x[indx];
     g_y_br = Particles.G.gravity_y[indx];
     g_z_br = Particles.G.gravity_z[indx];
 
-    indx = (indx_x) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g;
+    indx   = (indx_x) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g;
     g_x_bu = Particles.G.gravity_x[indx];
     g_y_bu = Particles.G.gravity_y[indx];
     g_z_bu = Particles.G.gravity_z[indx];
 
-    indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g;
+    indx    = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g;
     g_x_bru = Particles.G.gravity_x[indx];
     g_y_bru = Particles.G.gravity_y[indx];
     g_z_bru = Particles.G.gravity_z[indx];
 
-    indx = (indx_x) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g;
+    indx   = (indx_x) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g;
     g_x_tl = Particles.G.gravity_x[indx];
     g_y_tl = Particles.G.gravity_y[indx];
     g_z_tl = Particles.G.gravity_z[indx];
 
-    indx = (indx_x+1) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g;
+    indx   = (indx_x + 1) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g;
     g_x_tr = Particles.G.gravity_x[indx];
     g_y_tr = Particles.G.gravity_y[indx];
     g_z_tr = Particles.G.gravity_z[indx];
 
-    indx = (indx_x) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
+    indx   = (indx_x) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
     g_x_tu = Particles.G.gravity_x[indx];
     g_y_tu = Particles.G.gravity_y[indx];
     g_z_tu = Particles.G.gravity_z[indx];
 
-    indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
+    indx    = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
     g_x_tru = Particles.G.gravity_x[indx];
     g_y_tru = Particles.G.gravity_y[indx];
     g_z_tru = Particles.G.gravity_z[indx];
 
-    g_x = g_x_bl*(delta_x)*(delta_y)*(delta_z)     + g_x_br*(1-delta_x)*(delta_y)*(delta_z) +
-          g_x_bu*(delta_x)*(1-delta_y)*(delta_z  ) + g_x_bru*(1-delta_x)*(1-delta_y)*(delta_z) +
-          g_x_tl*(delta_x)*(delta_y)*(1-delta_z)   + g_x_tr*(1-delta_x)*(delta_y)*(1-delta_z) +
-          g_x_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_x_tru*(1-delta_x)*(1-delta_y)*(1-delta_z);
+    g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + g_x_br * (1 - delta_x) * (delta_y) * (delta_z) +
+          g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) +
+          g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) +
+          g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
 
-    g_y = g_y_bl*(delta_x)*(delta_y)*(delta_z)     + g_y_br*(1-delta_x)*(delta_y)*(delta_z) +
-          g_y_bu*(delta_x)*(1-delta_y)*(delta_z)   + g_y_bru*(1-delta_x)*(1-delta_y)*(delta_z) +
-          g_y_tl*(delta_x)*(delta_y)*(1-delta_z)   + g_y_tr*(1-delta_x)*(delta_y)*(1-delta_z) +
-          g_y_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_y_tru*(1-delta_x)*(1-delta_y)*(1-delta_z);
+    g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + g_y_br * (1 - delta_x) * (delta_y) * (delta_z) +
+          g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) +
+          g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) +
+          g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
 
-    g_z = g_z_bl*(delta_x)*(delta_y)*(delta_z)     + g_z_br*(1-delta_x)*(delta_y)*(delta_z) +
-          g_z_bu*(delta_x)*(1-delta_y)*(delta_z)   + g_z_bru*(1-delta_x)*(1-delta_y)*(delta_z) +
-          g_z_tl*(delta_x)*(delta_y)*(1-delta_z)   + g_z_tr*(1-delta_x)*(delta_y)*(1-delta_z) +
-          g_z_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_z_tru*(1-delta_x)*(1-delta_y)*(1-delta_z);
+    g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + g_z_br * (1 - delta_x) * (delta_y) * (delta_z) +
+          g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) +
+          g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) +
+          g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
 
     Particles.grav_x[pIndx] = g_x;
     Particles.grav_y[pIndx] = g_y;
@@ -347,8 +348,6 @@ void Grid3D::Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end ){
   }
 }
 
+  #endif  // PARTICLES_CPU
 
-
-#endif //PARTICLES_CPU
-
-#endif//PARTICLES
+#endif  // PARTICLES
diff --git a/src/particles/gravity_CIC_gpu.cu b/src/particles/gravity_CIC_gpu.cu
index f1466e332..4711b1a32 100644
--- a/src/particles/gravity_CIC_gpu.cu
+++ b/src/particles/gravity_CIC_gpu.cu
@@ -1,151 +1,166 @@
 #ifdef PARTICLES
 
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../particles/particles_3D.h"
-
-#ifdef GRAVITY_GPU
-#include "../grid/grid3D.h"
-#endif
-
-#ifdef PARTICLES_GPU
-
-//Copy the potential from host to device
-void Particles_3D::Copy_Potential_To_GPU( Real *potential_host, Real *potential_dev, int n_cells_potential ){
-  CudaSafeCall( cudaMemcpy( potential_dev, potential_host, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice) );
-}
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <unistd.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../utils/gpu.hpp"
+  #include "particles_3D.h"
+
+  #ifdef GRAVITY_GPU
+    #include "../grid/grid3D.h"
+  #endif
 
+  #ifdef PARTICLES_GPU
 
-//Kernel to compute the gradient of the potential
-__global__ void Get_Gravity_Field_Particles_Kernel(  Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, int nx, int ny, int nz, int n_ghost_particles_grid, int n_ghost_potential, Real dx, Real dy, Real dz ){
+// Copy the potential from host to device
+void Particles3D::Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential)
+{
+  GPU_Error_Check(cudaMemcpy(potential_dev, potential_host, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice));
+}
 
+// Kernel to compute the gradient of the potential
+__global__ void Get_Gravity_Field_Particles_Kernel(Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev,
+                                                   Real *gravity_z_dev, int nx, int ny, int nz,
+                                                   int n_ghost_particles_grid, int n_ghost_potential, Real dx, Real dy,
+                                                   Real dz)
+{
   int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
   int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
   int tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
   int nx_grav, ny_grav, nz_grav;
-  nx_grav = nx + 2*n_ghost_particles_grid;
-  ny_grav = ny + 2*n_ghost_particles_grid;
-  nz_grav = nz + 2*n_ghost_particles_grid;
+  nx_grav = nx + 2 * n_ghost_particles_grid;
+  ny_grav = ny + 2 * n_ghost_particles_grid;
+  nz_grav = nz + 2 * n_ghost_particles_grid;
 
-  if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav ) return;
-  int tid = tid_x + tid_y*nx_grav + tid_z*nx_grav*ny_grav;
+  if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav) {
+    return;
+  }
+  int tid = tid_x + tid_y * nx_grav + tid_z * nx_grav * ny_grav;
 
   int nx_pot, ny_pot;
-  nx_pot = nx + 2*n_ghost_potential;
-  ny_pot = ny + 2*n_ghost_potential;
-
-  // if (tid == 0) printf( "potential: %f\n", potential_dev[tid]  );
+  nx_pot = nx + 2 * n_ghost_potential;
+  ny_pot = ny + 2 * n_ghost_potential;
 
   int nGHST = n_ghost_potential - n_ghost_particles_grid;
 
   Real phi_l, phi_r;
   int id_l, id_r;
-  #ifdef GRAVITY_5_POINTS_GRADIENT
+    #ifdef GRAVITY_5_POINTS_GRADIENT
   Real phi_ll, phi_rr;
   int id_ll, id_rr;
-  #endif
+    #endif
 
   // Get Potential Gradient X
-  id_l = (tid_x-1 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
-  id_r = (tid_x+1 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
+  id_l  = (tid_x - 1 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
+  id_r  = (tid_x + 1 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
   phi_l = potential_dev[id_l];
   phi_r = potential_dev[id_r];
-  #ifdef GRAVITY_5_POINTS_GRADIENT
-  id_ll = (tid_x-2 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
-  id_rr = (tid_x+2 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
-  phi_ll = potential_dev[id_ll];
-  phi_rr = potential_dev[id_rr];
-  gravity_x_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dx);
-  #else
-  gravity_x_dev[tid] = -0.5 * ( phi_r - phi_l ) / dx;
-  #endif
+    #ifdef GRAVITY_5_POINTS_GRADIENT
+  id_ll              = (tid_x - 2 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
+  id_rr              = (tid_x + 2 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
+  phi_ll             = potential_dev[id_ll];
+  phi_rr             = potential_dev[id_rr];
+  gravity_x_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx);
+    #else
+  gravity_x_dev[tid] = -0.5 * (phi_r - phi_l) / dx;
+    #endif
 
   // Get Potential Gradient Y
-  id_l = (tid_x + nGHST) + (tid_y-1 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
-  id_r = (tid_x + nGHST) + (tid_y+1 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
+  id_l  = (tid_x + nGHST) + (tid_y - 1 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
+  id_r  = (tid_x + nGHST) + (tid_y + 1 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
   phi_l = potential_dev[id_l];
   phi_r = potential_dev[id_r];
-  #ifdef GRAVITY_5_POINTS_GRADIENT
-  id_ll = (tid_x + nGHST) + (tid_y-2 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
-  id_rr = (tid_x + nGHST) + (tid_y+2 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot;
-  phi_ll = potential_dev[id_ll];
-  phi_rr = potential_dev[id_rr];
-  gravity_y_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dy);
-  #else
-  gravity_y_dev[tid] = -0.5 * ( phi_r - phi_l ) / dy;
-  #endif
+    #ifdef GRAVITY_5_POINTS_GRADIENT
+  id_ll              = (tid_x + nGHST) + (tid_y - 2 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
+  id_rr              = (tid_x + nGHST) + (tid_y + 2 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot;
+  phi_ll             = potential_dev[id_ll];
+  phi_rr             = potential_dev[id_rr];
+  gravity_y_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy);
+    #else
+  gravity_y_dev[tid] = -0.5 * (phi_r - phi_l) / dy;
+    #endif
 
   // Get Potential Gradient Z
-  id_l = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z-1 + nGHST)*ny_pot*nx_pot;
-  id_r = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z+1 + nGHST)*ny_pot*nx_pot;
+  id_l  = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z - 1 + nGHST) * ny_pot * nx_pot;
+  id_r  = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + 1 + nGHST) * ny_pot * nx_pot;
   phi_l = potential_dev[id_l];
   phi_r = potential_dev[id_r];
-  #ifdef GRAVITY_5_POINTS_GRADIENT
-  id_ll = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z-2 + nGHST)*ny_pot*nx_pot;
-  id_rr = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z+2 + nGHST)*ny_pot*nx_pot;
-  phi_ll = potential_dev[id_ll];
-  phi_rr = potential_dev[id_rr];
-  gravity_z_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dz);
-  #else
-  gravity_z_dev[tid] = -0.5 * ( phi_r - phi_l ) / dz;
-  #endif
-
+    #ifdef GRAVITY_5_POINTS_GRADIENT
+  id_ll              = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z - 2 + nGHST) * ny_pot * nx_pot;
+  id_rr              = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + 2 + nGHST) * ny_pot * nx_pot;
+  phi_ll             = potential_dev[id_ll];
+  phi_rr             = potential_dev[id_rr];
+  gravity_z_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz);
+    #else
+  gravity_z_dev[tid] = -0.5 * (phi_r - phi_l) / dz;
+    #endif
 }
 
-
-//Call the kernel to compute the gradient of the potential
-void Particles_3D::Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz,  Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev  ){
-
-  #ifndef GRAVITY_GPU
-  Copy_Potential_To_GPU( potential_host, potential_dev, n_cells_potential );
-  #endif
+// Call the kernel to compute the gradient of the potential
+void Particles3D::Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local,
+                                                           int n_ghost_particles_grid, int n_cells_potential, Real dx,
+                                                           Real dy, Real dz, Real *potential_host, Real *potential_dev,
+                                                           Real *gravity_x_dev, Real *gravity_y_dev,
+                                                           Real *gravity_z_dev)
+{
+    #ifndef GRAVITY_GPU
+  Copy_Potential_To_GPU(potential_host, potential_dev, n_cells_potential);
+    #endif
 
   int nx_g, ny_g, nz_g;
-  nx_g = nx_local + 2*N_GHOST_POTENTIAL;
-  ny_g = ny_local + 2*N_GHOST_POTENTIAL;
-  nz_g = nz_local + 2*N_GHOST_POTENTIAL;
+  nx_g = nx_local + 2 * N_GHOST_POTENTIAL;
+  ny_g = ny_local + 2 * N_GHOST_POTENTIAL;
+  nz_g = nz_local + 2 * N_GHOST_POTENTIAL;
 
   // set values for GPU kernels
-  int tpb_x = 8;
-  int tpb_y = 8;
-  int tpb_z = 8;
-  int ngrid_x =  (nx_g + tpb_x - 1) / tpb_x;
-  int ngrid_y =  (ny_g + tpb_y - 1) / tpb_y;
-  int ngrid_z =  (nz_g + tpb_z - 1) / tpb_z;
+  int tpb_x   = 8;
+  int tpb_y   = 8;
+  int tpb_z   = 8;
+  int ngrid_x = (nx_g + tpb_x - 1) / tpb_x;
+  int ngrid_y = (ny_g + tpb_y - 1) / tpb_y;
+  int ngrid_z = (nz_g + tpb_z - 1) / tpb_z;
   // number of blocks per 1D grid
   dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z);
   //  number of threads per 1D block
   dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
-
-  hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0,  potential_dev, gravity_x_dev, gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz );
-  CudaCheckError();
+  hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev,
+                     gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid,
+                     N_GHOST_POTENTIAL, dx, dy, dz);
+  GPU_Error_Check();
 }
 
-
-//Get CIC indexes from the particles positions
-__device__ void Get_Indexes_CIC_Gravity( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){
-  indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx );
-  indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy );
-  indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz );
+// Get CIC indexes from the particles positions
+__device__ void Get_Indexes_CIC_Gravity(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x,
+                                        Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z)
+{
+  indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx);
+  indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy);
+  indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz);
 }
 
-//Kernel to compute the gravitational field at the particles positions via Cloud-In-Cell
-__global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev,  Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost  ){
-
-  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ;
-
-  if ( tid >= n_local) return;
+// Kernel to compute the gravitational field at the particles positions via
+// Cloud-In-Cell
+__global__ void Get_Gravity_CIC_Kernel(part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev,
+                                       Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev,
+                                       Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin,
+                                       Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx,
+                                       int ny, int nz, int n_ghost)
+{
+  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (tid >= n_local) {
+    return;
+  }
 
   int nx_g, ny_g;
-  nx_g = nx + 2*n_ghost;
-  ny_g = ny + 2*n_ghost;
+  nx_g = nx + 2 * n_ghost;
+  ny_g = ny + 2 * n_ghost;
 
   Real pos_x, pos_y, pos_z;
   Real cell_center_x, cell_center_y, cell_center_z;
@@ -159,97 +174,106 @@ __global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev,
   pos_y = pos_y_dev[tid];
   pos_z = pos_z_dev[tid];
 
-
   int indx_x, indx_y, indx_z, indx;
-  Get_Indexes_CIC_Gravity( xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z );
+  Get_Indexes_CIC_Gravity(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z);
 
   bool in_local = true;
 
-  if ( pos_x < xMin || pos_x >= xMax ) in_local = false;
-  if ( pos_y < yMin || pos_y >= yMax ) in_local = false;
-  if ( pos_z < zMin || pos_z >= zMax ) in_local = false;
-  if ( ! in_local  ) {
+  if (pos_x < xMin || pos_x >= xMax) {
+    in_local = false;
+  }
+  if (pos_y < yMin || pos_y >= yMax) {
+    in_local = false;
+  }
+  if (pos_z < zMin || pos_z >= zMax) {
+    in_local = false;
+  }
+  if (!in_local) {
     printf(" Gravity CIC Error: Particle outside local domain");
     return;
   }
 
-  cell_center_x = xMin + indx_x*dx + 0.5*dx;
-  cell_center_y = yMin + indx_y*dy + 0.5*dy;
-  cell_center_z = zMin + indx_z*dz + 0.5*dz;
-  delta_x = 1 - ( pos_x - cell_center_x ) / dx;
-  delta_y = 1 - ( pos_y - cell_center_y ) / dy;
-  delta_z = 1 - ( pos_z - cell_center_z ) / dz;
+  cell_center_x = xMin + indx_x * dx + 0.5 * dx;
+  cell_center_y = yMin + indx_y * dy + 0.5 * dy;
+  cell_center_z = zMin + indx_z * dz + 0.5 * dz;
+  delta_x       = 1 - (pos_x - cell_center_x) / dx;
+  delta_y       = 1 - (pos_y - cell_center_y) / dy;
+  delta_z       = 1 - (pos_z - cell_center_z) / dz;
   indx_x += n_ghost;
   indx_y += n_ghost;
   indx_z += n_ghost;
 
-  indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g;
+  indx   = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g;
   g_x_bl = gravity_x_dev[indx];
   g_y_bl = gravity_y_dev[indx];
   g_z_bl = gravity_z_dev[indx];
 
-  indx = (indx_x+1) + (indx_y)*nx_g + (indx_z)*nx_g*ny_g;
+  indx   = (indx_x + 1) + (indx_y)*nx_g + (indx_z)*nx_g * ny_g;
   g_x_br = gravity_x_dev[indx];
   g_y_br = gravity_y_dev[indx];
   g_z_br = gravity_z_dev[indx];
 
-  indx = (indx_x) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g;
+  indx   = (indx_x) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g;
   g_x_bu = gravity_x_dev[indx];
   g_y_bu = gravity_y_dev[indx];
   g_z_bu = gravity_z_dev[indx];
 
-  indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g;
+  indx    = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g;
   g_x_bru = gravity_x_dev[indx];
   g_y_bru = gravity_y_dev[indx];
   g_z_bru = gravity_z_dev[indx];
 
-  indx = (indx_x) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx   = (indx_x) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g;
   g_x_tl = gravity_x_dev[indx];
   g_y_tl = gravity_y_dev[indx];
   g_z_tl = gravity_z_dev[indx];
 
-  indx = (indx_x+1) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx   = (indx_x + 1) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g;
   g_x_tr = gravity_x_dev[indx];
   g_y_tr = gravity_y_dev[indx];
   g_z_tr = gravity_z_dev[indx];
 
-  indx = (indx_x) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx   = (indx_x) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
   g_x_tu = gravity_x_dev[indx];
   g_y_tu = gravity_y_dev[indx];
   g_z_tu = gravity_z_dev[indx];
 
-  indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g;
+  indx    = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g;
   g_x_tru = gravity_x_dev[indx];
   g_y_tru = gravity_y_dev[indx];
   g_z_tru = gravity_z_dev[indx];
 
-  g_x = g_x_bl*(delta_x)*(delta_y)*(delta_z)     + g_x_br*(1-delta_x)*(delta_y)*(delta_z) +
-        g_x_bu*(delta_x)*(1-delta_y)*(delta_z  ) + g_x_bru*(1-delta_x)*(1-delta_y)*(delta_z) +
-        g_x_tl*(delta_x)*(delta_y)*(1-delta_z)   + g_x_tr*(1-delta_x)*(delta_y)*(1-delta_z) +
-        g_x_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_x_tru*(1-delta_x)*(1-delta_y)*(1-delta_z);
+  g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + g_x_br * (1 - delta_x) * (delta_y) * (delta_z) +
+        g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) +
+        g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) +
+        g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
 
-  g_y = g_y_bl*(delta_x)*(delta_y)*(delta_z)     + g_y_br*(1-delta_x)*(delta_y)*(delta_z) +
-        g_y_bu*(delta_x)*(1-delta_y)*(delta_z)   + g_y_bru*(1-delta_x)*(1-delta_y)*(delta_z) +
-        g_y_tl*(delta_x)*(delta_y)*(1-delta_z)   + g_y_tr*(1-delta_x)*(delta_y)*(1-delta_z) +
-        g_y_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_y_tru*(1-delta_x)*(1-delta_y)*(1-delta_z);
+  g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + g_y_br * (1 - delta_x) * (delta_y) * (delta_z) +
+        g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) +
+        g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) +
+        g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
 
-  g_z = g_z_bl*(delta_x)*(delta_y)*(delta_z)     + g_z_br*(1-delta_x)*(delta_y)*(delta_z) +
-        g_z_bu*(delta_x)*(1-delta_y)*(delta_z)   + g_z_bru*(1-delta_x)*(1-delta_y)*(delta_z) +
-        g_z_tl*(delta_x)*(delta_y)*(1-delta_z)   + g_z_tr*(1-delta_x)*(delta_y)*(1-delta_z) +
-        g_z_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_z_tru*(1-delta_x)*(1-delta_y)*(1-delta_z);
+  g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + g_z_br * (1 - delta_x) * (delta_y) * (delta_z) +
+        g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) +
+        g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) +
+        g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z);
 
   grav_x_dev[tid] = g_x;
   grav_y_dev[tid] = g_y;
   grav_z_dev[tid] = g_z;
-
 }
 
-
-//Call the kernel to compote the gravitational field at the particles positions ( CIC )
-void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin,  Real zMax, Real dx, Real dy, Real dz,   Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev,  Real *grav_y_dev,  Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ){
-
+// Call the kernel to compote the gravitational field at the particles positions
+// ( CIC )
+void Particles3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local,
+                                               int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax,
+                                               Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev,
+                                               Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev,
+                                               Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev,
+                                               Real *gravity_z_dev)
+{
   // set values for GPU kernels
-  int ngrid =  (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  int ngrid = (n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -257,45 +281,47 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_loca
 
   // Only runs if there are local particles
   if (n_local > 0) {
-    hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid );
-    CudaCheckError();
+    hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev,
+                       gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin,
+                       zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid);
+    GPU_Error_Check();
   }
-
 }
 
-#endif //PARTICLES_GPU
-
-#ifdef GRAVITY_GPU
+  #endif  // PARTICLES_GPU
 
-void __global__ Copy_Particles_Density_Kernel( Real *dst_density, Real *src_density, int nx_local, int ny_local, int nz_local, int n_ghost ){
+  #ifdef GRAVITY_GPU
 
+void __global__ Copy_Particles_Density_Kernel(Real *dst_density, Real *src_density, int nx_local, int ny_local,
+                                              int nz_local, int n_ghost)
+{
   int tid_x, tid_y, tid_z, tid_CIC, tid_dens;
   tid_x = blockIdx.x * blockDim.x + threadIdx.x;
   tid_y = blockIdx.y * blockDim.y + threadIdx.y;
   tid_z = blockIdx.z * blockDim.z + threadIdx.z;
 
-  if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local ) return;
+  if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) {
+    return;
+  }
 
-  tid_dens = tid_x + tid_y*nx_local + tid_z*nx_local*ny_local;
+  tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local;
 
   tid_x += n_ghost;
   tid_y += n_ghost;
   tid_z += n_ghost;
 
   int nx_CIC, ny_CIC;
-  nx_CIC = nx_local + 2*n_ghost;
-  ny_CIC = ny_local + 2*n_ghost;
-  tid_CIC = tid_x + tid_y*nx_CIC + tid_z*nx_CIC*ny_CIC;
+  nx_CIC  = nx_local + 2 * n_ghost;
+  ny_CIC  = ny_local + 2 * n_ghost;
+  tid_CIC = tid_x + tid_y * nx_CIC + tid_z * nx_CIC * ny_CIC;
 
   dst_density[tid_dens] = src_density[tid_CIC];
-
 }
 
-
-
-//Copy the particles density to the density array in Grav to compute the potential
-void Grid3D::Copy_Particles_Density_GPU( ){
-
+// Copy the particles density to the density array in Grav to compute the
+// potential
+void Grid3D::Copy_Particles_Density_GPU()
+{
   int nx_local, ny_local, nz_local, n_ghost;
   n_ghost  = Particles.G.n_ghost_particles_grid;
   nx_local = Grav.nx_local;
@@ -303,9 +329,9 @@ void Grid3D::Copy_Particles_Density_GPU( ){
   nz_local = Grav.nz_local;
 
   // set values for GPU kernels
-  int tpb_x = 16;
-  int tpb_y = 8;
-  int tpb_z = 8;
+  int tpb_x   = 16;
+  int tpb_y   = 8;
+  int tpb_z   = 8;
   int ngrid_x = (nx_local - 1) / tpb_x + 1;
   int ngrid_y = (ny_local - 1) / tpb_y + 1;
   int ngrid_z = (nz_local - 1) / tpb_z + 1;
@@ -314,10 +340,10 @@ void Grid3D::Copy_Particles_Density_GPU( ){
   //  number of threads per 1D block
   dim3 dim3dBlock(tpb_x, tpb_y, tpb_z);
 
-  hipLaunchKernelGGL( Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.density_d, Particles.G.density_dev, nx_local, ny_local, nz_local, n_ghost );
+  hipLaunchKernelGGL(Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.density_d,
+                     Particles.G.density_dev, nx_local, ny_local, nz_local, n_ghost);
 }
 
+  #endif  // GRAVITY_GPU
 
-#endif//GRAVITY_GPU
-
-#endif//PARTICLES
+#endif  // PARTICLES
diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp
index ad3ee9127..7aaa627d6 100644
--- a/src/particles/io_particles.cpp
+++ b/src/particles/io_particles.cpp
@@ -1,188 +1,181 @@
 #ifdef PARTICLES
-#include <unistd.h>
-#include <iostream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include "../global/global.h"
-#include "../grid/grid3D.h"
-#include "../io/io.h"
-#include "../particles/particles_3D.h"
-
-#ifdef HDF5
-#include <hdf5.h>
-#endif
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif
+  #include <stdarg.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <string.h>
+  #include <unistd.h>
 
-// #define OUTPUT_PARTICLES_DATA
+  #include <iostream>
+  #include <string>
 
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "particles_3D.h"
 
-void Particles_3D::Load_Particles_Data( struct parameters *P){
-  char filename[100];
-  char timestep[20];
-  int nfile = P->nfile; //output step you want to read from
-  char filename_counter[100];
-  // create the filename to read from
+  #ifdef HDF5
+    #include <hdf5.h>
+  #endif
+  #ifdef MPI_CHOLLA
+    #include "../mpi/mpi_routines.h"
+  #endif
 
-  strcpy(filename, P->indir);
-  sprintf(timestep, "%d_particles", nfile);
-  strcat(filename,timestep);
+// #define OUTPUT_PARTICLES_DATA
 
-  #if defined BINARY
+void Particles3D::Load_Particles_Data(struct Parameters *P)
+{
+  #ifndef HDF5
   chprintf("\nERROR: Particles only support HDF5 outputs\n");
   exit(-1);
-  #elif defined HDF5
-  strcat(filename,".h5");
   #endif
 
+  // construct the filename to read from
   #ifdef MPI_CHOLLA
-  #ifdef TILED_INITIAL_CONDITIONS
-  sprintf(filename,"%sics_%dMpc_%d_particles.h5", P->indir, (int) P->tile_length/1000, G.nx_local); //Everyone reads the same file
-  #else
-  if (strcmp(P->init, "Disk_3D_particles") != 0) sprintf(filename,"%s.%d",filename,procID);
-  #endif //TILED_INITIAL_CONDITIONS
+    #ifdef TILED_INITIAL_CONDITIONS
+  // Every process reads the same file
+  const std::string base_fname =
+      ("ics_" + std::to_string((int)P->tile_length / 1000) + "Mpc_" + std::to_string(G.nx_local) + "_particles.h5");
+    #else
+  const int nfile              = P->nfile;  // output step you want to read from
+  const std::string base_fname = (std::to_string(nfile) + "_particles.h5." + std::to_string(procID));
+    #endif  // TILED_INITIAL_CONDITIONS
   #endif
 
-  chprintf(" Loading particles file: %s \n", filename );
+  const std::string filename = std::string(P->indir) + base_fname;
+
+  chprintf(" Loading particles file: %s \n", filename.c_str());
 
   #ifdef HDF5
-  hid_t  file_id;
-  herr_t  status;
+  hid_t file_id;
+  herr_t status;
 
   // open the file
-  file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+  file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
   if (file_id < 0) {
     printf("Unable to open input file.\n");
     exit(0);
   }
 
-  Load_Particles_Data_HDF5(file_id, nfile, P );
+  Load_Particles_Data_HDF5(file_id, nfile, P);
 
   #endif
 }
 
-
-void Grid3D::WriteData_Particles( struct parameters P, int nfile)
+void Grid3D::WriteData_Particles(struct Parameters P, int nfile)
 {
   // Write the particles data to file
-  OutputData_Particles( P, nfile);
+  OutputData_Particles(P, nfile);
 }
 
+  #ifdef HDF5
 
-#ifdef HDF5
-
-void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct parameters *P  )
+void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Parameters *P)
 {
   int i, j, k, id, buf_id;
-  hid_t     attribute_id, dataset_id;
-  Real      *dataset_buffer_px;
-  Real      *dataset_buffer_py;
-  Real      *dataset_buffer_pz;
-  Real      *dataset_buffer_vx;
-  Real      *dataset_buffer_vy;
-  Real      *dataset_buffer_vz;
-  Real      *dataset_buffer_m;
-  #ifdef PARTICLE_AGE
-  Real      *dataset_buffer_age;
-  #endif
-  herr_t    status;
+  hid_t attribute_id, dataset_id;
+  Real *dataset_buffer_px;
+  Real *dataset_buffer_py;
+  Real *dataset_buffer_pz;
+  Real *dataset_buffer_vx;
+  Real *dataset_buffer_vy;
+  Real *dataset_buffer_vz;
+  Real *dataset_buffer_m;
+    #ifdef PARTICLE_AGE
+  Real *dataset_buffer_age;
+    #endif
+  herr_t status;
 
   part_int_t n_to_load, pIndx;
 
   attribute_id = H5Aopen(file_id, "n_particles_local", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_LONG, &n_to_load);
-  status = H5Aclose(attribute_id);
+  status       = H5Aread(attribute_id, H5T_NATIVE_LONG, &n_to_load);
+  status       = H5Aclose(attribute_id);
 
-  #ifdef COSMOLOGY
+    #ifdef COSMOLOGY
   attribute_id = H5Aopen(file_id, "current_z", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &current_z);
-  status = H5Aclose(attribute_id);
+  status       = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &current_z);
+  status       = H5Aclose(attribute_id);
 
   attribute_id = H5Aopen(file_id, "current_a", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &current_a);
-  status = H5Aclose(attribute_id);
-  #endif
+  status       = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &current_a);
+  status       = H5Aclose(attribute_id);
+    #endif
 
-  #ifdef SINGLE_PARTICLE_MASS
+    #ifdef SINGLE_PARTICLE_MASS
   attribute_id = H5Aopen(file_id, "particle_mass", H5P_DEFAULT);
-  status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &particle_mass);
-  status = H5Aclose(attribute_id);
-  chprintf( " Using Single mass for DM particles: %f  Msun/h\n", particle_mass);
-  #endif
+  status       = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &particle_mass);
+  status       = H5Aclose(attribute_id);
+  chprintf(" Using Single mass for DM particles: %f  Msun/h\n", particle_mass);
+    #endif
 
-  #ifndef MPI_CHOLLA
+    #ifndef MPI_CHOLLA
   chprintf(" Loading %ld particles\n", n_to_load);
-  #else
-  if (strcmp(P->init, "Disk_3D_particles") != 0) {
-    part_int_t n_total_load;
-    n_total_load = ReducePartIntSum( n_to_load );
-    chprintf( " Total Particles To Load: %ld\n", n_total_load );
-  }
+    #else
+  part_int_t n_total_load;
+  n_total_load = ReducePartIntSum(n_to_load);
+  chprintf(" Total Particles To Load: %ld\n", n_total_load);
   // Print individual n_to_load
   // for ( int i=0; i<nproc; i++ ){
-  //   if ( procID == i ) std::cout << "  [pId:"  << procID << "]  Loading Particles: " << n_local <<  std::endl;
-  //   MPI_Barrier(world);
+  //   if ( procID == i ) std::cout << "  [pId:"  << procID << "]  Loading
+  //   Particles: " << n_local <<  std::endl; MPI_Barrier(world);
   // }
   MPI_Barrier(world);
-  #endif
-
+    #endif
 
-  dataset_buffer_px = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/pos_x", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_px);
-  status = H5Dclose(dataset_id);
+  dataset_buffer_px = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id        = H5Dopen(file_id, "/pos_x", H5P_DEFAULT);
+  status            = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_px);
+  status            = H5Dclose(dataset_id);
 
-  dataset_buffer_py = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/pos_y", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_py);
-  status = H5Dclose(dataset_id);
+  dataset_buffer_py = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id        = H5Dopen(file_id, "/pos_y", H5P_DEFAULT);
+  status            = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_py);
+  status            = H5Dclose(dataset_id);
 
-  dataset_buffer_pz = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/pos_z", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_pz);
-  status = H5Dclose(dataset_id);
+  dataset_buffer_pz = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id        = H5Dopen(file_id, "/pos_z", H5P_DEFAULT);
+  status            = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_pz);
+  status            = H5Dclose(dataset_id);
 
-  dataset_buffer_vx = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/vel_x", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vx);
-  status = H5Dclose(dataset_id);
+  dataset_buffer_vx = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id        = H5Dopen(file_id, "/vel_x", H5P_DEFAULT);
+  status            = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vx);
+  status            = H5Dclose(dataset_id);
 
-  dataset_buffer_vy = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/vel_y", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vy);
-  status = H5Dclose(dataset_id);
+  dataset_buffer_vy = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id        = H5Dopen(file_id, "/vel_y", H5P_DEFAULT);
+  status            = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vy);
+  status            = H5Dclose(dataset_id);
 
-  dataset_buffer_vz = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/vel_z", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vz);
-  status = H5Dclose(dataset_id);
+  dataset_buffer_vz = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id        = H5Dopen(file_id, "/vel_z", H5P_DEFAULT);
+  status            = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_vz);
+  status            = H5Dclose(dataset_id);
 
-  #ifndef SINGLE_PARTICLE_MASS
-  dataset_buffer_m = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/mass", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_m);
-  status = H5Dclose(dataset_id);
-  #endif
+    #ifndef SINGLE_PARTICLE_MASS
+  dataset_buffer_m = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id       = H5Dopen(file_id, "/mass", H5P_DEFAULT);
+  status           = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_m);
+  status           = H5Dclose(dataset_id);
+    #endif
 
-  #ifdef PARTICLE_IDS
+    #ifdef PARTICLE_IDS
   part_int_t *dataset_buffer_IDs;
-  dataset_buffer_IDs = (part_int_t *) malloc(n_to_load*sizeof(part_int_t));
-  dataset_id = H5Dopen(file_id, "/particle_IDs", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_IDs);
-  status = H5Dclose(dataset_id);
-  #endif
+  dataset_buffer_IDs = (part_int_t *)malloc(n_to_load * sizeof(part_int_t));
+  dataset_id         = H5Dopen(file_id, "/particle_IDs", H5P_DEFAULT);
+  status             = H5Dread(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_IDs);
+  status             = H5Dclose(dataset_id);
+    #endif
 
-  #ifdef PARTICLE_AGE
-  dataset_buffer_age = (Real *) malloc(n_to_load*sizeof(Real));
-  dataset_id = H5Dopen(file_id, "/age", H5P_DEFAULT);
-  status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_age);
-  status = H5Dclose(dataset_id);
-  #endif
+    #ifdef PARTICLE_AGE
+  dataset_buffer_age = (Real *)malloc(n_to_load * sizeof(Real));
+  dataset_id         = H5Dopen(file_id, "/age", H5P_DEFAULT);
+  status             = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_age);
+  status             = H5Dclose(dataset_id);
+    #endif
 
-  //Initialize min and max values for position and velocity to print initial Statistics
+  // Initialize min and max values for position and velocity to print initial
+  // Statistics
   Real px_min, px_max;
   Real py_min, py_max;
   Real pz_min, pz_max;
@@ -205,15 +198,16 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par
   // Real values for loading each particle data
   Real pPos_x, pPos_y, pPos_z;
   Real pVel_x, pVel_y, pVel_z, pMass;
-  #ifdef PARTICLE_AGE
+    #ifdef PARTICLE_AGE
   Real pAge;
-  #endif
+    #endif
 
   part_int_t pID;
   bool in_local;
 
-  //When using Tiled Initial Conditions the Positions have to be reescaled to the global box
-  #ifdef TILED_INITIAL_CONDITIONS
+    // When using Tiled Initial Conditions the Positions have to be reescaled to
+    // the global box
+    #ifdef TILED_INITIAL_CONDITIONS
 
   Real Lx_local = G.xMax - G.xMin;
   Real Ly_local = G.yMax - G.yMin;
@@ -222,24 +216,24 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par
   Real tile_length = P->tile_length;
   // Rescale the particles position to the global domain
   chprintf(" Rescaling the Tiled Particles Positions... \n");
-  chprintf("  Tile length:  %f   kpc/h \n", tile_length );
-  chprintf("  N_Procs  Z: %d    Y: %d    X: %d  \n", nproc_z, nproc_y, nproc_x );
+  chprintf("  Tile length:  %f   kpc/h \n", tile_length);
+  chprintf("  N_Procs  Z: %d    Y: %d    X: %d  \n", nproc_z, nproc_y, nproc_x);
 
   bool tile_length_difference = false;
-  if ( fabs( Lx_local - tile_length ) / Lx_local > 1e-2  ) tile_length_difference = true;
-  if ( fabs( Ly_local - tile_length ) / Ly_local > 1e-2  ) tile_length_difference = true;
-  if ( fabs( Lz_local - tile_length ) / Lz_local > 1e-2  ) tile_length_difference = true;
+  if (fabs(Lx_local - tile_length) / Lx_local > 1e-2) tile_length_difference = true;
+  if (fabs(Ly_local - tile_length) / Ly_local > 1e-2) tile_length_difference = true;
+  if (fabs(Lz_local - tile_length) / Lz_local > 1e-2) tile_length_difference = true;
 
-  if ( tile_length_difference ){
+  if (tile_length_difference) {
     std::cout << "  WARNING: Local Domain Length Different to Tile Length " << std::endl;
-    printf("   Domain Length:  [ %f  %f  %f  ]\n", Lz_local, Ly_local, Lx_local );
-    printf("   Tile Length:  %f \n", tile_length );
+    printf("   Domain Length:  [ %f  %f  %f  ]\n", Lz_local, Ly_local, Lx_local);
+    printf("   Tile Length:  %f \n", tile_length);
   }
 
-  #endif
+    #endif
 
-  //Loop over to input buffers and load each particle
-  for( pIndx=0; pIndx<n_to_load; pIndx++ ){
+  // Loop over to input buffers and load each particle
+  for (pIndx = 0; pIndx < n_to_load; pIndx++) {
     pPos_x = dataset_buffer_px[pIndx];
     pPos_y = dataset_buffer_py[pIndx];
     pPos_z = dataset_buffer_pz[pIndx];
@@ -262,137 +256,188 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par
     pPos_x += G.xMin;
     pPos_y += G.yMin;
     pPos_z += G.zMin;
-    #ifdef PARTICLES_GPU
-    //If PARTICLES_GPU: The positions are copied directly from the buffers so the positions are changed in the buffer
+      #ifdef PARTICLES_GPU
+    // If PARTICLES_GPU: The positions are copied directly from the buffers so
+    // the positions are changed in the buffer
     dataset_buffer_px[pIndx] = pPos_x;
     dataset_buffer_py[pIndx] = pPos_y;
     dataset_buffer_pz[pIndx] = pPos_z;
-    #endif //PARTICLES_GPU
-    #endif //TILED_INITIAL_CONDITIONS
+      #endif  // PARTICLES_GPU
+    #endif    // TILED_INITIAL_CONDITIONS
 
-    //Make sure the partilecles to load are in the local domain
+    // Make sure the partilecles to load are in the local domain
     in_local = true;
-    if ( pPos_x < G.domainMin_x || pPos_x > G.domainMax_x ){
+    if (pPos_x < G.domainMin_x || pPos_x > G.domainMax_x) {
       std::cout << " Particle outside global domain " << std::endl;
     }
-    if ( pPos_y < G.domainMin_y || pPos_y > G.domainMax_y ){
+    if (pPos_y < G.domainMin_y || pPos_y > G.domainMax_y) {
       std::cout << " Particle outside global domain " << std::endl;
     }
-    if ( pPos_z < G.domainMin_z || pPos_z > G.domainMax_z ){
+    if (pPos_z < G.domainMin_z || pPos_z > G.domainMax_z) {
       std::cout << " Particle outside global domain " << std::endl;
     }
-    if ( pPos_x < G.xMin || pPos_x >= G.xMax ) in_local = false;
-    if ( pPos_y < G.yMin || pPos_y >= G.yMax ) in_local = false;
-    if ( pPos_z < G.zMin || pPos_z >= G.zMax ) in_local = false;
-    if ( ! in_local  ) {
-      #ifdef PARTICLE_IDS
+    if (pPos_x < G.xMin || pPos_x >= G.xMax) {
+      in_local = false;
+    }
+    if (pPos_y < G.yMin || pPos_y >= G.yMax) {
+      in_local = false;
+    }
+    if (pPos_z < G.zMin || pPos_z >= G.zMax) {
+      in_local = false;
+    }
+    if (!in_local) {
+    #ifdef PARTICLE_IDS
       std::cout << " Particle outside Local  domain    pID: " << pID << std::endl;
-      #else
+    #else
       std::cout << " Particle outside Local  domain " << std::endl;
-      #endif
-      std::cout << "  Domain X: " << G.xMin <<  "  " << G.xMax << std::endl;
-      std::cout << "  Domain Y: " << G.yMin <<  "  " << G.yMax << std::endl;
-      std::cout << "  Domain Z: " << G.zMin <<  "  " << G.zMax << std::endl;
+    #endif
+      std::cout << "  Domain X: " << G.xMin << "  " << G.xMax << std::endl;
+      std::cout << "  Domain Y: " << G.yMin << "  " << G.yMax << std::endl;
+      std::cout << "  Domain Z: " << G.zMin << "  " << G.zMax << std::endl;
       std::cout << "  Particle X: " << pPos_x << std::endl;
       std::cout << "  Particle Y: " << pPos_y << std::endl;
       std::cout << "  Particle Z: " << pPos_z << std::endl;
       continue;
     }
 
-    //Keep track of the max and min position and velocity to print Initial Statistics
-    if  ( pPos_x > px_max ) px_max = pPos_x;
-    if  ( pPos_y > py_max ) py_max = pPos_y;
-    if  ( pPos_z > pz_max ) pz_max = pPos_z;
+    // Keep track of the max and min position and velocity to print Initial
+    // Statistics
+    if (pPos_x > px_max) {
+      px_max = pPos_x;
+    }
+    if (pPos_y > py_max) {
+      py_max = pPos_y;
+    }
+    if (pPos_z > pz_max) {
+      pz_max = pPos_z;
+    }
 
-    if  ( pPos_x < px_min ) px_min = pPos_x;
-    if  ( pPos_y < py_min ) py_min = pPos_y;
-    if  ( pPos_z < pz_min ) pz_min = pPos_z;
+    if (pPos_x < px_min) {
+      px_min = pPos_x;
+    }
+    if (pPos_y < py_min) {
+      py_min = pPos_y;
+    }
+    if (pPos_z < pz_min) {
+      pz_min = pPos_z;
+    }
 
-    if  ( pVel_x > vx_max ) vx_max = pVel_x;
-    if  ( pVel_y > vy_max ) vy_max = pVel_y;
-    if  ( pVel_z > vz_max ) vz_max = pVel_z;
+    if (pVel_x > vx_max) {
+      vx_max = pVel_x;
+    }
+    if (pVel_y > vy_max) {
+      vy_max = pVel_y;
+    }
+    if (pVel_z > vz_max) {
+      vz_max = pVel_z;
+    }
 
-    if  ( pVel_x < vx_min ) vx_min = pVel_x;
-    if  ( pVel_y < vy_min ) vy_min = pVel_y;
-    if  ( pVel_z < vz_min ) vz_min = pVel_z;
+    if (pVel_x < vx_min) {
+      vx_min = pVel_x;
+    }
+    if (pVel_y < vy_min) {
+      vy_min = pVel_y;
+    }
+    if (pVel_z < vz_min) {
+      vz_min = pVel_z;
+    }
 
     #ifdef PARTICLES_CPU
-    //Add the particle data to the particles vectors
-    pos_x.push_back( pPos_x );
-    pos_y.push_back( pPos_y );
-    pos_z.push_back( pPos_z );
-    vel_x.push_back( pVel_x );
-    vel_y.push_back( pVel_y );
-    vel_z.push_back( pVel_z );
-    grav_x.push_back( 0.0 );
-    grav_y.push_back( 0.0 );
-    grav_z.push_back( 0.0 );
-    #ifndef SINGLE_PARTICLE_MASS
-    mass.push_back( pMass );
-    #endif
-    #ifdef PARTICLE_IDS
+    // Add the particle data to the particles vectors
+    pos_x.push_back(pPos_x);
+    pos_y.push_back(pPos_y);
+    pos_z.push_back(pPos_z);
+    vel_x.push_back(pVel_x);
+    vel_y.push_back(pVel_y);
+    vel_z.push_back(pVel_z);
+    grav_x.push_back(0.0);
+    grav_y.push_back(0.0);
+    grav_z.push_back(0.0);
+      #ifndef SINGLE_PARTICLE_MASS
+    mass.push_back(pMass);
+      #endif
+      #ifdef PARTICLE_IDS
     partIDs.push_back(pID);
-    #endif
-    #ifdef PARTICLE_AGE
-    age.push_back( pAge );
-    #endif
-    n_local += 1; //Add 1 to the local number of particles
-    #endif//PARTICLES_CPU
+      #endif
+      #ifdef PARTICLE_AGE
+    age.push_back(pAge);
+      #endif
+    n_local += 1;  // Add 1 to the local number of particles
+    #endif         // PARTICLES_CPU
   }
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   // Alocate memory in GPU for particle data
   // particles_array_size = (part_int_t) n_to_load;
-  particles_array_size = Compute_Particles_GPU_Array_Size( n_to_load );
-  chprintf( " Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, G.gpu_allocation_factor, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &pos_x_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &pos_y_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &pos_z_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &vel_x_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &vel_y_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &vel_z_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &grav_x_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &grav_y_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &grav_z_dev, particles_array_size);
+  particles_array_size = Compute_Particles_GPU_Array_Size(n_to_load);
+  chprintf(" Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, G.gpu_allocation_factor, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&pos_x_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&pos_y_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&pos_z_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&vel_x_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&vel_y_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&vel_z_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&grav_x_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&grav_y_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&grav_z_dev, particles_array_size);
+      #ifndef SINGLE_PARTICLE_MASS
+  Allocate_Particles_GPU_Array_Real(&mass_dev, particles_array_size);
+      #endif
+      #ifdef PARTICLE_IDS
+  Allocate_Particles_GPU_Array_Part_Int(&partIDs_dev, particles_array_size);
+      #endif
+      #ifdef PARTICLE_AGE
+  Allocate_Particles_GPU_Array_Real(&age_dev, particles_array_size);
+      #endif
+
   n_local = n_to_load;
 
-  chprintf( " Allocated GPU memory for particle data\n");
+  chprintf(" Allocated GPU memory for particle data\n");
   // printf( " Loaded %ld  particles ", n_to_load);
 
-  //Copyt the particle data to GPU memory
-  Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_px, pos_x_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_py, pos_y_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_pz, pos_z_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vx, vel_x_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vy, vel_y_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vz, vel_z_dev, n_local);
-  #endif
+  // Copy the particle data to GPU memory
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_px, pos_x_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_py, pos_y_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_pz, pos_z_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vx, vel_x_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vy, vel_y_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vz, vel_z_dev, n_local);
+      #ifndef SINGLE_PARTICLE_MASS
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_m, mass_dev, n_local);
+      #endif
+      #ifdef PARTICLE_IDS
+  Copy_Particles_Array_Int_Host_to_Device(dataset_buffer_IDs, partIDs_dev, n_local);
+      #endif
+      #ifdef PARTICLE_AGE
+  Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_age, age_dev, n_local);
+      #endif
+    #endif  // PARTICLES_GPU
 
-  #ifndef MPI_CHOLLA
-  chprintf( " Loaded  %ld  particles\n", n_local );
-  #else
+    #ifndef MPI_CHOLLA
+  chprintf(" Loaded  %ld  particles\n", n_local);
+    #else
   MPI_Barrier(world);
   part_int_t n_total_loaded;
-  n_total_loaded = ReducePartIntSum( n_local );
+  n_total_loaded  = ReducePartIntSum(n_local);
   n_total_initial = n_total_loaded;
-  chprintf( " Total Particles Loaded: %ld\n", n_total_loaded );
-  #endif
+  chprintf(" Total Particles Loaded: %ld\n", n_total_loaded);
+    #endif
 
-  #ifdef MPI_CHOLLA
-  Real px_max_g = ReduceRealMax( px_max );
-  Real py_max_g = ReduceRealMax( py_max );
-  Real pz_max_g = ReduceRealMax( pz_max );
-  Real vx_max_g = ReduceRealMax( vx_max );
-  Real vy_max_g = ReduceRealMax( vy_max );
-  Real vz_max_g = ReduceRealMax( vz_max );
-
-  Real px_min_g = ReduceRealMin( px_min );
-  Real py_min_g = ReduceRealMin( py_min );
-  Real pz_min_g = ReduceRealMin( pz_min );
-  Real vx_min_g = ReduceRealMin( vx_min );
-  Real vy_min_g = ReduceRealMin( vy_min );
-  Real vz_min_g = ReduceRealMin( vz_min );
-  #else
+    #ifdef MPI_CHOLLA
+  Real px_max_g = ReduceRealMax(px_max);
+  Real py_max_g = ReduceRealMax(py_max);
+  Real pz_max_g = ReduceRealMax(pz_max);
+  Real vx_max_g = ReduceRealMax(vx_max);
+  Real vy_max_g = ReduceRealMax(vy_max);
+  Real vz_max_g = ReduceRealMax(vz_max);
+
+  Real px_min_g = ReduceRealMin(px_min);
+  Real py_min_g = ReduceRealMin(py_min);
+  Real pz_min_g = ReduceRealMin(pz_min);
+  Real vx_min_g = ReduceRealMin(vx_min);
+  Real vy_min_g = ReduceRealMin(vy_min);
+  Real vz_min_g = ReduceRealMin(vz_min);
+    #else
   Real px_max_g = px_max;
   Real py_max_g = py_max;
   Real pz_max_g = pz_max;
@@ -406,45 +451,45 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par
   Real vx_min_g = vx_min;
   Real vy_min_g = vy_min;
   Real vz_min_g = vz_min;
-  #endif//MPI_CHOLLA
-
-  //Print initial Statistics
-  #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
-  chprintf( "  Pos X   Min: %f   Max: %f   [ kpc/h ]\n", px_min_g, px_max_g);
-  chprintf( "  Pos Y   Min: %f   Max: %f   [ kpc/h ]\n", py_min_g, py_max_g);
-  chprintf( "  Pos Z   Min: %f   Max: %f   [ kpc/h ]\n", pz_min_g, pz_max_g);
-  chprintf( "  Vel X   Min: %f   Max: %f   [ km/s ]\n", vx_min_g, vx_max_g);
-  chprintf( "  Vel Y   Min: %f   Max: %f   [ km/s ]\n", vy_min_g, vy_max_g);
-  chprintf( "  Vel Z   Min: %f   Max: %f   [ km/s ]\n", vz_min_g, vz_max_g);
-  #endif//PRINT_INITIAL_STATS
-
-  //Free the buffers to used to load the hdf5 files
+    #endif  // MPI_CHOLLA
+
+    // Print initial Statistics
+    #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY)
+  chprintf("  Pos X   Min: %f   Max: %f   [ kpc/h ]\n", px_min_g, px_max_g);
+  chprintf("  Pos Y   Min: %f   Max: %f   [ kpc/h ]\n", py_min_g, py_max_g);
+  chprintf("  Pos Z   Min: %f   Max: %f   [ kpc/h ]\n", pz_min_g, pz_max_g);
+  chprintf("  Vel X   Min: %f   Max: %f   [ km/s ]\n", vx_min_g, vx_max_g);
+  chprintf("  Vel Y   Min: %f   Max: %f   [ km/s ]\n", vy_min_g, vy_max_g);
+  chprintf("  Vel Z   Min: %f   Max: %f   [ km/s ]\n", vz_min_g, vz_max_g);
+    #endif  // PRINT_INITIAL_STATS
+
+  // Free the buffers to used to load the hdf5 files
   free(dataset_buffer_px);
   free(dataset_buffer_py);
   free(dataset_buffer_pz);
   free(dataset_buffer_vx);
   free(dataset_buffer_vy);
   free(dataset_buffer_vz);
-  #ifndef SINGLE_PARTICLE_MASS
+    #ifndef SINGLE_PARTICLE_MASS
   free(dataset_buffer_m);
-  #endif
-  #ifdef PARTICLE_IDS
+    #endif
+    #ifdef PARTICLE_IDS
   free(dataset_buffer_IDs);
-  #endif
-  #ifdef PARTICLE_AGE
+    #endif
+    #ifdef PARTICLE_AGE
   free(dataset_buffer_age);
-  #endif
+    #endif
 }
 
-
 /*! \fn void Write_Header_HDF5(hid_t file_id)
  *  \brief Write the relevant header info to the HDF5 file. */
-void Grid3D::Write_Particles_Header_HDF5( hid_t file_id){
-  hid_t     attribute_id, dataspace_id;
-  herr_t    status;
-  hsize_t   attr_dims;
-  int       int_data[3];
-  Real      Real_data[3];
+void Grid3D::Write_Particles_Header_HDF5(hid_t file_id)
+{
+  hid_t attribute_id, dataspace_id;
+  herr_t status;
+  hsize_t attr_dims;
+  int int_data[3];
+  Real Real_data[3];
 
   // Single attributes first
   attr_dims = 1;
@@ -455,292 +500,285 @@ void Grid3D::Write_Particles_Header_HDF5( hid_t file_id){
   // Write the attribute data
   status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.t);
   // Close the attribute
-  status = H5Aclose(attribute_id);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "dt_particles", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.dt);
-  status = H5Aclose(attribute_id);
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.dt);
+  status       = H5Aclose(attribute_id);
   attribute_id = H5Acreate(file_id, "n_particles_local", H5T_STD_I64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_ULONG, &Particles.n_local);
-  status = H5Aclose(attribute_id);
-
+  status       = H5Awrite(attribute_id, H5T_NATIVE_LONG, &Particles.n_local);
+  status       = H5Aclose(attribute_id);
 
-  #ifdef SINGLE_PARTICLE_MASS
+    #ifdef SINGLE_PARTICLE_MASS
   attribute_id = H5Acreate(file_id, "particle_mass", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass);
-  status = H5Aclose(attribute_id);
-  #endif
-  
-  #ifdef COSMOLOGY
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass);
+  status       = H5Aclose(attribute_id);
+    #endif
+
+    #ifdef COSMOLOGY
   attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z);
-  status = H5Aclose(attribute_id);
-  
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z);
+  status       = H5Aclose(attribute_id);
+
   attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a);
-  status = H5Aclose(attribute_id);
-  #endif
+  status       = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a);
+  status       = H5Aclose(attribute_id);
+    #endif
 
   status = H5Sclose(dataspace_id);
-
 }
 
-
-void Grid3D::Write_Particles_Data_HDF5( hid_t file_id){
+void Grid3D::Write_Particles_Data_HDF5(hid_t file_id)
+{
   part_int_t i, j, k, id, buf_id;
-  hid_t     dataset_id, dataspace_id;
-  Real      *dataset_buffer;
-  part_int_t  *dataset_buffer_IDs;
-  herr_t    status;
+  hid_t dataset_id, dataspace_id;
+  Real *dataset_buffer;
+    #ifdef PARTICLE_IDS
+  part_int_t *dataset_buffer_IDs;
+    #endif
+  herr_t status;
   part_int_t n_local = Particles.n_local;
-  hsize_t   dims[1];
-  dataset_buffer = (Real *) malloc(n_local*sizeof(Real));
+  hsize_t dims[1];
+  dataset_buffer = (Real *)malloc(n_local * sizeof(Real));
 
   bool output_particle_data;
 
-  #ifdef OUTPUT_PARTICLES_DATA
+    #ifdef OUTPUT_PARTICLES_DATA
   output_particle_data = true;
-  #else
+    #else
   output_particle_data = false;
-  #endif
-
-  #ifdef PARTICLES_GPU
-  //Copy the device arrays from the device to the host
-  CudaSafeCall( cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells*sizeof(Real), cudaMemcpyDeviceToHost) );
-  #endif//PARTICLES_GPU
-  #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU)
-  CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) );
-  #endif//OUTPUT_POTENTIAL
-
+    #endif
 
+    #ifdef PARTICLES_GPU
+  // Copy the device arrays from the device to the host
+  GPU_Error_Check(cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells * sizeof(Real),
+                             cudaMemcpyDeviceToHost));
+    #endif  // PARTICLES_GPU
+    #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU)
+  GPU_Error_Check(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real),
+                             cudaMemcpyDeviceToHost));
+    #endif  // OUTPUT_POTENTIAL
 
   // Count Current Total Particles
   part_int_t N_particles_total;
-  #ifdef MPI_CHOLLA
-  N_particles_total = ReducePartIntSum( Particles.n_local );
-  #else
+    #ifdef MPI_CHOLLA
+  N_particles_total = ReducePartIntSum(Particles.n_local);
+    #else
   N_particles_total = Particles.n_local;
-  #endif
-
-  //Print the total particles when saving the particles data
-  chprintf( " Total Particles: %ld\n", N_particles_total );
+    #endif
 
-  //Print a warning if the number of particles has changed from the initial number of particles.
-  //This will indicate an error on the Particles transfers.
-  if ( N_particles_total != Particles.n_total_initial ) chprintf( " WARNING: Lost Particles: %d \n", Particles.n_total_initial - N_particles_total );
+  // Print the total particles when saving the particles data
+  chprintf(" Total Particles: %ld\n", N_particles_total);
 
+  // Print a warning if the number of particles has changed from the initial
+  // number of particles. This will indicate an error on the Particles
+  // transfers.
+  if (N_particles_total != Particles.n_total_initial) {
+    chprintf(" WARNING: Lost Particles: %d \n", Particles.n_total_initial - N_particles_total);
+  }
 
   // Create the data space for the datasets
-  dims[0] = n_local;
+  dims[0]      = n_local;
   dataspace_id = H5Screate_simple(1, dims, NULL);
 
-  //Copy the particles data to the hdf5_buffers and create the data_sets
+    // Copy the particles data to the hdf5_buffers and create the data_sets
 
-  // Copy the pos_x vector to the memory buffer
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.pos_x[i];
-  #endif //PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  Particles.Copy_Particles_Array_Real_Device_to_Host( Particles.pos_x_dev, dataset_buffer, Particles.n_local );
-  #endif//PARTICLES_GPU
-  if ( output_particle_data || H.Output_Complete_Data ){
+    // Copy the pos_x vector to the memory buffer
+    #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.pos_x[i];
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.pos_x_dev, dataset_buffer, Particles.n_local);
+    #endif  // PARTICLES_GPU
+  if (output_particle_data || H.Output_Complete_Data) {
     dataset_id = H5Dcreate(file_id, "/pos_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+    status     = H5Dclose(dataset_id);
   }
 
-  // Copy the pos_y vector to the memory buffer
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.pos_y[i];
-  #endif //PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  Particles.Copy_Particles_Array_Real_Device_to_Host( Particles.pos_y_dev, dataset_buffer, Particles.n_local );
-  #endif//PARTICLES_GPU
-  if ( output_particle_data || H.Output_Complete_Data ){
+    // Copy the pos_y vector to the memory buffer
+    #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.pos_y[i];
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.pos_y_dev, dataset_buffer, Particles.n_local);
+    #endif  // PARTICLES_GPU
+  if (output_particle_data || H.Output_Complete_Data) {
     dataset_id = H5Dcreate(file_id, "/pos_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+    status     = H5Dclose(dataset_id);
   }
 
-  // Copy the pos_z vector to the memory buffer
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.pos_z[i];
-  #endif //PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  Particles.Copy_Particles_Array_Real_Device_to_Host( Particles.pos_z_dev, dataset_buffer, Particles.n_local );
-  #endif//PARTICLES_GPU
-  if ( output_particle_data || H.Output_Complete_Data ){
+    // Copy the pos_z vector to the memory buffer
+    #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.pos_z[i];
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.pos_z_dev, dataset_buffer, Particles.n_local);
+    #endif  // PARTICLES_GPU
+  if (output_particle_data || H.Output_Complete_Data) {
     dataset_id = H5Dcreate(file_id, "/pos_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+    status     = H5Dclose(dataset_id);
   }
 
-  // Copy the vel_x vector to the memory buffer
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.vel_x[i];
-  #endif //PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  Particles.Copy_Particles_Array_Real_Device_to_Host( Particles.vel_x_dev, dataset_buffer, Particles.n_local );
-  #endif//PARTICLES_GPU
-  if ( output_particle_data || H.Output_Complete_Data ){
+    // Copy the vel_x vector to the memory buffer
+    #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.vel_x[i];
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.vel_x_dev, dataset_buffer, Particles.n_local);
+    #endif  // PARTICLES_GPU
+  if (output_particle_data || H.Output_Complete_Data) {
     dataset_id = H5Dcreate(file_id, "/vel_x", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+    status     = H5Dclose(dataset_id);
   }
 
-  // Copy the vel_y vector to the memory buffer
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.vel_y[i];
-  #endif //PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  Particles.Copy_Particles_Array_Real_Device_to_Host( Particles.vel_y_dev, dataset_buffer, Particles.n_local );
-  #endif//PARTICLES_GPU
-  if ( output_particle_data || H.Output_Complete_Data ){
+    // Copy the vel_y vector to the memory buffer
+    #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.vel_y[i];
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.vel_y_dev, dataset_buffer, Particles.n_local);
+    #endif  // PARTICLES_GPU
+  if (output_particle_data || H.Output_Complete_Data) {
     dataset_id = H5Dcreate(file_id, "/vel_y", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+    status     = H5Dclose(dataset_id);
   }
 
-  // Copy the vel_z vector to the memory buffer
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.vel_z[i];
-  #endif //PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  Particles.Copy_Particles_Array_Real_Device_to_Host( Particles.vel_z_dev, dataset_buffer, Particles.n_local );
-  #endif//PARTICLES_GPU
-  if ( output_particle_data || H.Output_Complete_Data ){
+    // Copy the vel_z vector to the memory buffer
+    #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.vel_z[i];
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.vel_z_dev, dataset_buffer, Particles.n_local);
+    #endif  // PARTICLES_GPU
+  if (output_particle_data || H.Output_Complete_Data) {
     dataset_id = H5Dcreate(file_id, "/vel_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-    status = H5Dclose(dataset_id);
+    status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+    status     = H5Dclose(dataset_id);
   }
 
-  #ifndef SINGLE_PARTICLE_MASS
-  // Copy the mass vector to the memory buffer
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.mass[i];
-  #endif //PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  Particles.Copy_Particles_Array_Real_Device_to_Host( Particles.mass_dev, dataset_buffer, Particles.n_local );
-  #endif//PARTICLES_GPU
+    #ifndef SINGLE_PARTICLE_MASS
+      // Copy the mass vector to the memory buffer
+      #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.mass[i];
+      #endif  // PARTICLES_CPU
+      #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.mass_dev, dataset_buffer, Particles.n_local);
+      #endif  // PARTICLES_GPU
   dataset_id = H5Dcreate(file_id, "/mass", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-  status = H5Dclose(dataset_id);
-  #endif
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  status     = H5Dclose(dataset_id);
+    #endif
 
-  #ifdef PARTICLE_IDS
-  dataset_buffer_IDs = (part_int_t *) malloc(n_local*sizeof(part_int_t));
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer_IDs[i] = Particles.partIDs[i];
-  #endif //PARTICLES_CPU
+    #ifdef PARTICLE_IDS
+  dataset_buffer_IDs = (part_int_t *)malloc(n_local * sizeof(part_int_t));
+      #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer_IDs[i] = Particles.partIDs[i];
+      #endif  // PARTICLES_CPU
+      #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Int_Device_to_Host(Particles.partIDs_dev, dataset_buffer_IDs, Particles.n_local);
+      #endif  // PARTICLES_GPU
   dataset_id = H5Dcreate(file_id, "/particle_IDs", H5T_STD_I64LE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_IDs);
-  status = H5Dclose(dataset_id);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_LONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer_IDs);
+  status     = H5Dclose(dataset_id);
   free(dataset_buffer_IDs);
-  #endif
+    #endif
 
-  #ifdef PARTICLE_AGE
-  #ifdef PARTICLES_CPU
-  for ( i=0; i<n_local; i++) dataset_buffer[i] = Particles.age[i];
-  #endif //PARTICLES_CPU
+    #ifdef PARTICLE_AGE
+      #ifdef PARTICLES_CPU
+  for (i = 0; i < n_local; i++) dataset_buffer[i] = Particles.age[i];
+      #endif  // PARTICLES_CPU
+      #ifdef PARTICLES_GPU
+  Particles.Copy_Particles_Array_Real_Device_to_Host(Particles.age_dev, dataset_buffer, Particles.n_local);
+      #endif  // PARTICLES_GPU
   dataset_id = H5Dcreate(file_id, "/age", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-  status = H5Dclose(dataset_id);
-  #endif
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  status     = H5Dclose(dataset_id);
+    #endif
 
-  //Create a data set for the grid data ( density and potential )
+  // Create a data set for the grid data ( density and potential )
 
   // 3D case
-  int       nx_dset = Particles.G.nx_local;
-  int       ny_dset = Particles.G.ny_local;
-  int       nz_dset = Particles.G.nz_local;
-  hsize_t   dims3d[3];
-  dataset_buffer = (Real *) malloc(Particles.G.nz_local*Particles.G.ny_local*Particles.G.nx_local*sizeof(Real));
+  int nx_dset = Particles.G.nx_local;
+  int ny_dset = Particles.G.ny_local;
+  int nz_dset = Particles.G.nz_local;
+  hsize_t dims3d[3];
+  dataset_buffer = (Real *)malloc(Particles.G.nz_local * Particles.G.ny_local * Particles.G.nx_local * sizeof(Real));
 
   // Create the data space for the datasets
-  dims3d[0] = nx_dset;
-  dims3d[1] = ny_dset;
-  dims3d[2] = nz_dset;
+  dims3d[0]    = nx_dset;
+  dims3d[1]    = ny_dset;
+  dims3d[2]    = nz_dset;
   dataspace_id = H5Screate_simple(3, dims3d, NULL);
 
   // Copy the density array to the memory buffer
   int nGHST = Particles.G.n_ghost_particles_grid;
-  for (k=0; k<Particles.G.nz_local; k++) {
-    for (j=0; j<Particles.G.ny_local; j++) {
-      for (i=0; i<Particles.G.nx_local; i++) {
-        id = (i+nGHST) + (j+nGHST)*(Particles.G.nx_local+2*nGHST) + (k+nGHST)*(Particles.G.nx_local+2*nGHST)*(Particles.G.ny_local+2*nGHST);
-        buf_id = k + j*Particles.G.nz_local + i*Particles.G.nz_local*Particles.G.ny_local;
+  for (k = 0; k < Particles.G.nz_local; k++) {
+    for (j = 0; j < Particles.G.ny_local; j++) {
+      for (i = 0; i < Particles.G.nx_local; i++) {
+        id = (i + nGHST) + (j + nGHST) * (Particles.G.nx_local + 2 * nGHST) +
+             (k + nGHST) * (Particles.G.nx_local + 2 * nGHST) * (Particles.G.ny_local + 2 * nGHST);
+        buf_id                 = k + j * Particles.G.nz_local + i * Particles.G.nz_local * Particles.G.ny_local;
         dataset_buffer[buf_id] = Particles.G.density[id];
       }
     }
   }
 
   dataset_id = H5Dcreate(file_id, "/density", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-  status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
-  status = H5Dclose(dataset_id);
+  status     = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
+  status     = H5Dclose(dataset_id);
 
-  #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES)
+    #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES)
   // Copy the potential array to the memory buffer
-  for (k=0; k<Grav.nz_local; k++) {
-    for (j=0; j<Grav.ny_local; j++) {
-      for (i=0; i<Grav.nx_local; i++) {
-        id = (i+N_GHOST_POTENTIAL) + (j+N_GHOST_POTENTIAL)*(Grav.nx_local+2*N_GHOST_POTENTIAL) + (k+N_GHOST_POTENTIAL)*(Grav.nx_local+2*N_GHOST_POTENTIAL)*(Grav.ny_local+2*N_GHOST_POTENTIAL);
-        buf_id = k + j*Grav.nz_local + i*Grav.nz_local*Grav.ny_local;
+  for (k = 0; k < Grav.nz_local; k++) {
+    for (j = 0; j < Grav.ny_local; j++) {
+      for (i = 0; i < Grav.nx_local; i++) {
+        id =
+            (i + N_GHOST_POTENTIAL) + (j + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) +
+            (k + N_GHOST_POTENTIAL) * (Grav.nx_local + 2 * N_GHOST_POTENTIAL) * (Grav.ny_local + 2 * N_GHOST_POTENTIAL);
+        buf_id                 = k + j * Grav.nz_local + i * Grav.nz_local * Grav.ny_local;
         dataset_buffer[buf_id] = Grav.F.potential_h[id];
-
       }
     }
   }
-  dataset_id = H5Dcreate(file_id, "/grav_potential", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+  dataset_id =
+      H5Dcreate(file_id, "/grav_potential", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
   status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer);
   status = H5Dclose(dataset_id);
-  #endif //OUTPUT_POTENTIAL
-
+    #endif  // OUTPUT_POTENTIAL
 
   free(dataset_buffer);
 }
-#endif//HDF5
-
+  #endif  // HDF5
 
-
-void Grid3D::OutputData_Particles( struct parameters P, int nfile)
+void Grid3D::OutputData_Particles(struct Parameters P, int nfile)
 {
   FILE *out;
-  char filename[MAXLEN];
-  char timestep[20];
+  std::string filename = FnameTemplate(P).format_fname(nfile, "_particles");
 
-  // create the filename
-  strcpy(filename, P.outdir);
-  sprintf(timestep, "%d", nfile);
-  strcat(filename,timestep);
   // a binary file is created for each process
   #if defined BINARY
-  chprintf("\nERROR: Particles only support HDF5 outputs\n")
-  return;
-  // only one HDF5 file is created
-  #elif defined HDF5
-  strcat(filename,"_particles");
-  strcat(filename,".h5");
-  #ifdef MPI_CHOLLA
-  sprintf(filename,"%s.%d",filename,procID);
-  #endif
+  chprintf("\nERROR: Particles only support HDF5 outputs\n") return;
   #endif
 
   #if defined HDF5
-  hid_t   file_id;
-  herr_t  status;
+  hid_t file_id;
+  herr_t status;
 
   // Create a new file collectively
-  file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+  file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
 
   // Write header (file attributes)
   Write_Header_HDF5(file_id);
-  Write_Particles_Header_HDF5( file_id);
-  Write_Particles_Data_HDF5( file_id);
+  Write_Particles_Header_HDF5(file_id);
+  Write_Particles_Data_HDF5(file_id);
 
   // Close the file
   status = H5Fclose(file_id);
   #endif
 }
 
-
-
 #endif
diff --git a/src/particles/particles_3D.cpp b/src/particles/particles_3D.cpp
index 13a5543ca..6417e4136 100644
--- a/src/particles/particles_3D.cpp
+++ b/src/particles/particles_3D.cpp
@@ -1,73 +1,76 @@
 #ifdef PARTICLES
 
-#include <unistd.h>
-#include <random>
-#include <cstdint>
-#include <functional>
-#include <cmath>
-#include "../io/io.h"
-#include "../grid/grid3D.h"
-#include "../utils/prng_utilities.h"
-#include "../model/disk_galaxy.h"
-#include "../particles/particles_3D.h"
-#include "../utils/error_handling.h"
-
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif
-
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
-
-Particles_3D::Particles_3D( void ):
-  TRANSFER_DENSITY_BOUNDARIES(false),
-  TRANSFER_PARTICLES_BOUNDARIES(false)
-{}
-
-void Grid3D::Initialize_Particles( struct parameters *P ){
-
-  chprintf( "\nInitializing Particles...\n");
-
-  Particles.Initialize( P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, H.ydglobal, H.zdglobal );
-
-  #if defined (PARTICLES_GPU) && defined (GRAVITY_GPU)
-  // Set the GPU array for the particles potential equal to the Gravity GPU array for the potential
-  Particles.G.potential_dev = Grav.F.potential_d;
-  #endif
+  #include "particles_3D.h"
+
+  #include <unistd.h>
 
-  if (strcmp(P->init, "Uniform")==0)  Initialize_Uniform_Particles();
+  #include <cmath>
+  #include <cstdint>
+  #include <functional>
+  #include <random>
+
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../model/disk_galaxy.h"
+  #include "../utils/error_handling.h"
+  #include "../utils/prng_utilities.h"
 
   #ifdef MPI_CHOLLA
-  MPI_Barrier( world );
+    #include "../mpi/mpi_routines.h"
   #endif
-  chprintf( "Particles Initialized Successfully. \n\n");
 
+  #ifdef PARALLEL_OMP
+    #include "../utils/parallel_omp.h"
+  #endif
 
-}
+Particles3D::Particles3D(void) : TRANSFER_DENSITY_BOUNDARIES(false), TRANSFER_PARTICLES_BOUNDARIES(false) {}
 
-void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav,  Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal){
+void Grid3D::Initialize_Particles(struct Parameters *P)
+{
+  chprintf("\nInitializing Particles...\n");
 
-  //Initialize local and total number of particles to 0
-  n_local = 0;
-  n_total = 0;
+  Particles.Initialize(P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, H.ydglobal, H.zdglobal);
+
+  #if defined(PARTICLES_GPU) && defined(GRAVITY_GPU)
+  // Set the GPU array for the particles potential equal to the Gravity GPU
+  // array for the potential
+  Particles.G.potential_dev = Grav.F.potential_d;
+  #endif
+
+  if (strcmp(P->init, "Uniform") == 0) {
+    Initialize_Uniform_Particles();
+  }
+
+  #ifdef MPI_CHOLLA
+  MPI_Barrier(world);
+  #endif
+  chprintf("Particles Initialized Successfully. \n\n");
+}
+
+void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal,
+                             Real ydglobal, Real zdglobal)
+{
+  // Initialize local and total number of particles to 0
+  n_local         = 0;
+  n_total         = 0;
   n_total_initial = 0;
 
-  //Initialize the simulation time and delta_t to 0
+  // Initialize the simulation time and delta_t to 0
   dt = 0.0;
-  t = 0.0;
-  //Set the maximum delta_t for particles, this can be changed depending on the problem.
+  t  = 0.0;
+  // Set the maximum delta_t for particles, this can be changed depending on the
+  // problem.
   max_dt = 10000;
 
-  //Courant CFL condition factor for particles
+  // Courant CFL condition factor for particles
   C_cfl = 0.3;
 
   #ifndef SINGLE_PARTICLE_MASS
-  particle_mass = 0; //The particle masses are stored in a separate array
+  particle_mass = 0;  // The particle masses are stored in a separate array
   #endif
 
   #ifdef PARTICLES_CPU
-  //Vectors for positions, velocities and accelerations
+  // Vectors for positions, velocities and accelerations
   real_vector_t pos_x;
   real_vector_t pos_y;
   real_vector_t pos_z;
@@ -78,32 +81,33 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav,  Real xbound,
   real_vector_t grav_y;
   real_vector_t grav_z;
 
-  #ifndef SINGLE_PARTICLE_MASS
-  //Vector for masses
+    #ifndef SINGLE_PARTICLE_MASS
+  // Vector for masses
   real_vector_t mass;
-  #endif
-  #ifdef PARTICLE_IDS
-  //Vector for particle IDs
+    #endif
+    #ifdef PARTICLE_IDS
+  // Vector for particle IDs
   int_vector_t partIDs;
-  #endif
-  #ifdef PARTICLE_AGE
+    #endif
+    #ifdef PARTICLE_AGE
   real_vector_t age;
-  #endif
+    #endif
 
-  #ifdef MPI_CHOLLA
-  //Vectors for the indices of the particles that need to be transferred via MPI
+    #ifdef MPI_CHOLLA
+  // Vectors for the indices of the particles that need to be transferred via
+  // MPI
   int_vector_t out_indxs_vec_x0;
   int_vector_t out_indxs_vec_x1;
   int_vector_t out_indxs_vec_y0;
   int_vector_t out_indxs_vec_y1;
   int_vector_t out_indxs_vec_z0;
   int_vector_t out_indxs_vec_z1;
-  #endif
+    #endif
 
-  #endif //PARTICLES_CPU
+  #endif  // PARTICLES_CPU
 
-  //Initialize Grid Values
-  //Local and total number of cells
+  // Initialize Grid Values
+  // Local and total number of cells
   G.nx_local = Grav.nx_local;
   G.ny_local = Grav.ny_local;
   G.nz_local = Grav.nz_local;
@@ -111,38 +115,40 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav,  Real xbound,
   G.ny_total = Grav.ny_total;
   G.nz_total = Grav.nz_total;
 
-  //Uniform (dx, dy, dz)
+  // Uniform (dx, dy, dz)
   G.dx = Grav.dx;
   G.dy = Grav.dy;
   G.dz = Grav.dz;
 
-  //Left boundaries of the local domain
+  // Left boundaries of the local domain
   G.xMin = Grav.xMin;
   G.yMin = Grav.yMin;
   G.zMin = Grav.zMin;
 
-  //Right boundaries of the local domain
+  // Right boundaries of the local domain
   G.xMax = Grav.xMax;
   G.yMax = Grav.yMax;
   G.zMax = Grav.zMax;
 
-  //Left boundaries of the global domain
+  // Left boundaries of the global domain
   G.domainMin_x = xbound;
   G.domainMin_y = ybound;
   G.domainMin_z = zbound;
 
-  //Right boundaries of the global domain
+  // Right boundaries of the global domain
   G.domainMax_x = xbound + xdglobal;
   G.domainMax_y = ybound + ydglobal;
   G.domainMax_z = zbound + zdglobal;
 
-  //Number of ghost cells for the particles grid. For CIC one ghost cell is needed
+  // Number of ghost cells for the particles grid. For CIC one ghost cell is
+  // needed
   G.n_ghost_particles_grid = 1;
 
-  //Number of cells for the particles grid including ghost cells
-  G.n_cells = (G.nx_local+2*G.n_ghost_particles_grid) * (G.ny_local+2*G.n_ghost_particles_grid) * (G.nz_local+2*G.n_ghost_particles_grid);
+  // Number of cells for the particles grid including ghost cells
+  G.n_cells = (G.nx_local + 2 * G.n_ghost_particles_grid) * (G.ny_local + 2 * G.n_ghost_particles_grid) *
+              (G.nz_local + 2 * G.n_ghost_particles_grid);
 
-  //Set the boundary types
+  // Set the boundary types
   #ifdef MPI_CHOLLA
   G.boundary_type_x0 = P->xlg_bcnd;
   G.boundary_type_x1 = P->xug_bcnd;
@@ -158,41 +164,49 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav,  Real xbound,
   G.boundary_type_z0 = P->zl_bcnd;
   G.boundary_type_z1 = P->zu_bcnd;
   #endif
-    
+
   #ifdef PARTICLES_GPU
-  //Factor to allocate the particles data arrays on the GPU.
-  //When using MPI particles will be transferred to other GPU, for that reason we need extra memory allocated
-  #ifdef MPI_CHOLLA
+    // Factor to allocate the particles data arrays on the GPU.
+    // When using MPI particles will be transferred to other GPU, for that
+    // reason we need extra memory allocated
+    #ifdef MPI_CHOLLA
   G.gpu_allocation_factor = 1.25;
-  #else
+    #else
   G.gpu_allocation_factor = 1.0;
-  #endif
-
-  G.size_blocks_array = 1024*128;
-  G.n_cells_potential = ( G.nx_local + 2*N_GHOST_POTENTIAL ) * ( G.ny_local + 2*N_GHOST_POTENTIAL ) * ( G.nz_local + 2*N_GHOST_POTENTIAL );
+    #endif
 
-  #ifdef SINGLE_PARTICLE_MASS
-  mass_dev = NULL; //This array won't be used
-  #endif
+  G.size_blocks_array = 1024 * 128;
+  G.n_cells_potential = (G.nx_local + 2 * N_GHOST_POTENTIAL) * (G.ny_local + 2 * N_GHOST_POTENTIAL) *
+                        (G.nz_local + 2 * N_GHOST_POTENTIAL);
 
+    #ifdef SINGLE_PARTICLE_MASS
+  mass_dev = NULL;  // This array won't be used
+    #endif
 
-  #endif //PARTICLES_GPU
+  #endif  // PARTICLES_GPU
 
   // Flags for Initial and tranfer the particles and density
-  INITIAL = true;
-  TRANSFER_DENSITY_BOUNDARIES = false;
+  INITIAL                       = true;
+  TRANSFER_DENSITY_BOUNDARIES   = false;
   TRANSFER_PARTICLES_BOUNDARIES = false;
 
   Allocate_Memory();
 
-  //Initialize the particles density and gravitational field to 0.
+  // Initialize the particles density and gravitational field to 0.
   Initialize_Grid_Values();
 
   // Initialize Particles
-  if (strcmp(P->init, "Spherical_Overdensity_3D")==0) Initialize_Sphere(P);
-  else if (strcmp(P->init, "Zeldovich_Pancake")==0) Initialize_Zeldovich_Pancake( P );
-  else if (strcmp(P->init, "Read_Grid")==0)  Load_Particles_Data(  P );
-  else if (strcmp(P->init, "Disk_3D_particles") == 0)  Initialize_Disk_Stellar_Clusters(P);
+  if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) {
+    Initialize_Sphere(P);
+  } else if (strcmp(P->init, "Zeldovich_Pancake") == 0) {
+    Initialize_Zeldovich_Pancake(P);
+  } else if (strcmp(P->init, "Read_Grid") == 0) {
+    Load_Particles_Data(P);
+  #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS)
+  } else if (strcmp(P->init, "Disk_3D_particles") == 0) {
+    Initialize_Disk_Stellar_Clusters(P);
+  #endif
+  }
 
   #ifdef MPI_CHOLLA
   n_total_initial = ReducePartIntSum(n_local);
@@ -200,20 +214,24 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav,  Real xbound,
   n_total_initial = n_local;
   #endif
 
-  chprintf("Particles Initialized: \n n_local: %lu \n", n_local );
-  chprintf(" n_total: %lu \n", n_total_initial );
-  chprintf(" xDomain_local:  [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax );
-  chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.domainMin_x, G.domainMax_x, G.domainMin_y, G.domainMax_y, G.domainMin_z, G.domainMax_z);
-  chprintf(" dx: %f  %f  %f\n", G.dx, G.dy, G.dz );
+  chprintf("Particles Initialized: \n n_local: %lu \n", n_local);
+  chprintf(" n_total: %lu \n", n_total_initial);
+  chprintf(" xDomain_local:  [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax);
+  chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.domainMin_x, G.domainMax_x, G.domainMin_y,
+           G.domainMax_y, G.domainMin_z, G.domainMax_z);
+  chprintf(" dx: %f  %f  %f\n", G.dx, G.dy, G.dz);
 
   #ifdef PARTICLE_IDS
   chprintf(" Tracking particle IDs\n");
   #endif
 
   #if defined(MPI_CHOLLA) && defined(PRINT_DOMAIN)
-  for (int n=0; n<nproc; n++){
-    if (procID == n ) std::cout << procID << " x["<< G.xMin << "," << G.xMax << "] "  << " y["<< G.yMin << "," << G.yMax << "] "  << " z["<< G.zMin << "," << G.zMax << "] " << std::endl;
-    usleep( 100 );
+  for (int n = 0; n < nproc; n++) {
+    if (procID == n)
+      std::cout << procID << " x[" << G.xMin << "," << G.xMax << "] "
+                << " y[" << G.yMin << "," << G.yMax << "] "
+                << " z[" << G.zMin << "," << G.zMax << "] " << std::endl;
+    usleep(100);
   }
   #endif
 
@@ -222,126 +240,121 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav,  Real xbound,
   int n_omp_max = omp_get_max_threads();
   chprintf("  MAX OMP Threads: %d\n", n_omp_max);
   chprintf("  N OMP Threads per MPI process: %d\n", N_OMP_THREADS);
-  #ifdef PRINT_OMP_DOMAIN
-  // Print omp domain for each omp thread
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+    #ifdef PRINT_OMP_DOMAIN
+      // Print omp domain for each omp thread
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     part_int_t omp_pIndx_start, omp_pIndx_end;
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    #pragma omp barrier
-    Get_OMP_Particles_Indxs( n_local, n_omp_procs, omp_id, &omp_pIndx_start, &omp_pIndx_end );
+      #pragma omp barrier
+    Get_OMP_Particles_Indxs(n_local, n_omp_procs, omp_id, &omp_pIndx_start, &omp_pIndx_end);
 
-    for (int omp_indx = 0; omp_indx<n_omp_procs; omp_indx++){
-      if (omp_id == omp_indx) chprintf( "  omp_id:%d  p_start:%ld  p_end:%ld  \n", omp_id, omp_pIndx_start, omp_pIndx_end );
+    for (int omp_indx = 0; omp_indx < n_omp_procs; omp_indx++) {
+      if (omp_id == omp_indx)
+        chprintf("  omp_id:%d  p_start:%ld  p_end:%ld  \n", omp_id, omp_pIndx_start, omp_pIndx_end);
     }
   }
-  #endif//PRINT_OMP_DOMAIN
-  #endif//PARALLEL_OMP
-
+    #endif  // PRINT_OMP_DOMAIN
+  #endif    // PARALLEL_OMP
 
   #ifdef MPI_CHOLLA
-  chprintf( " N_Particles Boundaries Buffer Size: %d\n", N_PARTICLES_TRANSFER);
-  chprintf( " N_Data per Particle Transfer: %d\n", N_DATA_PER_PARTICLE_TRANSFER);
+  chprintf(" N_Particles Boundaries Buffer Size: %d\n", N_PARTICLES_TRANSFER);
+  chprintf(" N_Data per Particle Transfer: %d\n", N_DATA_PER_PARTICLE_TRANSFER);
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   Allocate_Memory_GPU_MPI();
-  #endif//PARTICLES_GPU
-  #endif//MPI_CHOLLA
+    #endif  // PARTICLES_GPU
+  #endif    // MPI_CHOLLA
 }
 
+void Particles3D::Allocate_Memory(void)
+{
+  // Allocate arrays for density and gravitational field
 
-void Particles_3D::Allocate_Memory( void ){
-
-  //Allocate arrays for density and gravitational field
-
-  G.density   = (Real *) malloc(G.n_cells*sizeof(Real));
+  G.density = (Real *)malloc(G.n_cells * sizeof(Real));
   #ifdef PARTICLES_CPU
-  G.gravity_x = (Real *) malloc(G.n_cells*sizeof(Real));
-  G.gravity_y = (Real *) malloc(G.n_cells*sizeof(Real));
-  G.gravity_z = (Real *) malloc(G.n_cells*sizeof(Real));
-  #ifdef GRAVITY_GPU
-  // Array to copy the particles density to the device for computing the potential in the device
-  Allocate_Particles_Grid_Field_Real( &G.density_dev, G.n_cells);
-  #endif
+  G.gravity_x = (Real *)malloc(G.n_cells * sizeof(Real));
+  G.gravity_y = (Real *)malloc(G.n_cells * sizeof(Real));
+  G.gravity_z = (Real *)malloc(G.n_cells * sizeof(Real));
+    #ifdef GRAVITY_GPU
+  // Array to copy the particles density to the device for computing the
+  // potential in the device
+  Allocate_Particles_Grid_Field_Real(&G.density_dev, G.n_cells);
+    #endif
   #endif
 
   #ifdef PARTICLES_GPU
   Allocate_Memory_GPU();
-  G.dti_array_host = (Real *) malloc(G.size_blocks_array*sizeof(Real));
+  G.dti_array_host = (Real *)malloc(G.size_blocks_array * sizeof(Real));
   #endif
 }
 
-
-#ifdef PARTICLES_GPU
-void Particles_3D::Allocate_Memory_GPU(){
-
-  //Allocate arrays for density and gravitational field on the GPU
-
-  Allocate_Particles_Grid_Field_Real( &G.density_dev, G.n_cells);
-  Allocate_Particles_Grid_Field_Real( &G.gravity_x_dev, G.n_cells);
-  Allocate_Particles_Grid_Field_Real( &G.gravity_y_dev, G.n_cells);
-  Allocate_Particles_Grid_Field_Real( &G.gravity_z_dev, G.n_cells);
-  Allocate_Particles_Grid_Field_Real( &G.dti_array_dev, G.size_blocks_array);
-  #ifndef GRAVITY_GPU
-  Allocate_Particles_Grid_Field_Real( &G.potential_dev, G.n_cells_potential);
-  #endif
-  chprintf( " Allocated GPU memory.\n");
+  #ifdef PARTICLES_GPU
+void Particles3D::Allocate_Memory_GPU()
+{
+  // Allocate arrays for density and gravitational field on the GPU
+
+  Allocate_Particles_Grid_Field_Real(&G.density_dev, G.n_cells);
+  Allocate_Particles_Grid_Field_Real(&G.gravity_x_dev, G.n_cells);
+  Allocate_Particles_Grid_Field_Real(&G.gravity_y_dev, G.n_cells);
+  Allocate_Particles_Grid_Field_Real(&G.gravity_z_dev, G.n_cells);
+  Allocate_Particles_Grid_Field_Real(&G.dti_array_dev, G.size_blocks_array);
+    #ifndef GRAVITY_GPU
+  Allocate_Particles_Grid_Field_Real(&G.potential_dev, G.n_cells_potential);
+    #endif
+  chprintf(" Allocated GPU memory.\n");
 }
 
-part_int_t Particles_3D::Compute_Particles_GPU_Array_Size( part_int_t n ){
-
+part_int_t Particles3D::Compute_Particles_GPU_Array_Size(part_int_t n)
+{
   part_int_t buffer_size = n * G.gpu_allocation_factor;
   return buffer_size;
-
-
 }
 
+    #ifdef MPI_CHOLLA
 
-#ifdef MPI_CHOLLA
-
-void Particles_3D::ReAllocate_Memory_GPU_MPI(){
-  
+void Particles3D::ReAllocate_Memory_GPU_MPI()
+{
   // Free the previous arrays
   Free_GPU_Array_bool(G.transfer_particles_flags_d);
   Free_GPU_Array_int(G.transfer_particles_indices_d);
   Free_GPU_Array_int(G.replace_particles_indices_d);
   Free_GPU_Array_int(G.transfer_particles_prefix_sum_d);
   Free_GPU_Array_int(G.transfer_particles_prefix_sum_blocks_d);
-  
-  //Allocate new resized arrays for the particles MPI transfers
+
+  // Allocate new resized arrays for the particles MPI transfers
   part_int_t buffer_size, half_blocks_size;
-  buffer_size = particles_array_size;
-  half_blocks_size = ( (buffer_size-1)/2   ) / TPB_PARTICLES + 1;
-  Allocate_Particles_GPU_Array_bool( &G.transfer_particles_flags_d,      buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.transfer_particles_indices_d,    buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.replace_particles_indices_d,     buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.transfer_particles_prefix_sum_d, buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.transfer_particles_prefix_sum_blocks_d, half_blocks_size );
-  printf(" New allocation of arrays for particles transfers   new_size: %d \n", (int)buffer_size   );
-  
+  buffer_size      = particles_array_size;
+  half_blocks_size = ((buffer_size - 1) / 2) / TPB_PARTICLES + 1;
+  Allocate_Particles_GPU_Array_bool(&G.transfer_particles_flags_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.transfer_particles_indices_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.replace_particles_indices_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_blocks_d, half_blocks_size);
+  printf(" New allocation of arrays for particles transfers   new_size: %d \n", (int)buffer_size);
 }
 
-void Particles_3D::Allocate_Memory_GPU_MPI(){
-
-
-  //Allocate memory for the the particles MPI transfers
+void Particles3D::Allocate_Memory_GPU_MPI()
+{
+  // Allocate memory for the the particles MPI transfers
   part_int_t buffer_size, half_blocks_size;
 
-  buffer_size = Compute_Particles_GPU_Array_Size( n_local );
-  half_blocks_size = ( (buffer_size-1)/2   ) / TPB_PARTICLES + 1;
+  buffer_size      = Compute_Particles_GPU_Array_Size(n_local);
+  half_blocks_size = ((buffer_size - 1) / 2) / TPB_PARTICLES + 1;
 
-  Allocate_Particles_GPU_Array_bool( &G.transfer_particles_flags_d,      buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.transfer_particles_indices_d,    buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.replace_particles_indices_d,     buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.transfer_particles_prefix_sum_d, buffer_size );
-  Allocate_Particles_GPU_Array_int(  &G.transfer_particles_prefix_sum_blocks_d, half_blocks_size );
-  Allocate_Particles_GPU_Array_int(  &G.n_transfer_d, 1);
+  Allocate_Particles_GPU_Array_bool(&G.transfer_particles_flags_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.transfer_particles_indices_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.replace_particles_indices_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_d, buffer_size);
+  Allocate_Particles_GPU_Array_int(&G.transfer_particles_prefix_sum_blocks_d, half_blocks_size);
+  Allocate_Particles_GPU_Array_int(&G.n_transfer_d, 1);
 
-  G.n_transfer_h = (int *) malloc(sizeof(int));
+  G.n_transfer_h = (int *)malloc(sizeof(int));
 
-  // Used the global particles send/recv buffers that already have been alloctaed in Allocate_MPI_DeviceBuffers_BLOCK
+  // Used the global particles send/recv buffers that already have been
+  // alloctaed in Allocate_MPI_DeviceBuffers_BLOCK
   G.send_buffer_size_x0 = buffer_length_particles_x0_send;
   G.send_buffer_size_x1 = buffer_length_particles_x1_send;
   G.send_buffer_size_y0 = buffer_length_particles_y0_send;
@@ -369,22 +382,20 @@ void Particles_3D::Allocate_Memory_GPU_MPI(){
   G.recv_buffer_y1_d = d_recv_buffer_y1_particles;
   G.recv_buffer_z0_d = d_recv_buffer_z0_particles;
   G.recv_buffer_z1_d = d_recv_buffer_z1_particles;
-
 }
-#endif //MPI_CHOLLA
-
-
-void Particles_3D::Free_Memory_GPU(){
+    #endif  // MPI_CHOLLA
 
+void Particles3D::Free_Memory_GPU()
+{
   Free_GPU_Array_Real(G.density_dev);
   Free_GPU_Array_Real(G.gravity_x_dev);
   Free_GPU_Array_Real(G.gravity_y_dev);
   Free_GPU_Array_Real(G.gravity_z_dev);
   Free_GPU_Array_Real(G.dti_array_dev);
 
-  #ifndef GRAVITY_GPU
+    #ifndef GRAVITY_GPU
   Free_GPU_Array_Real(G.potential_dev);
-  #endif
+    #endif
 
   Free_GPU_Array_Real(pos_x_dev);
   Free_GPU_Array_Real(pos_y_dev);
@@ -395,72 +406,79 @@ void Particles_3D::Free_Memory_GPU(){
   Free_GPU_Array_Real(grav_x_dev);
   Free_GPU_Array_Real(grav_y_dev);
   Free_GPU_Array_Real(grav_z_dev);
+    #ifdef PARTICLE_IDS
+  Free_GPU_Array(partIDs_dev);
+    #endif
+    #ifdef PARTICLE_AGE
+  Free_GPU_Array_Real(age_dev);
+    #endif
+    #ifndef SINGLE_PARTICLE_MASS
+  Free_GPU_Array_Real(mass_dev);
+    #endif
 
-  #ifdef MPI_CHOLLA
-  Free_GPU_Array_bool( G.transfer_particles_flags_d);
-  Free_GPU_Array_int( G.transfer_particles_prefix_sum_d);
-  Free_GPU_Array_int( G.transfer_particles_prefix_sum_blocks_d);
-  Free_GPU_Array_int( G.transfer_particles_indices_d);
-  Free_GPU_Array_int( G.replace_particles_indices_d);
-  Free_GPU_Array_int( G.n_transfer_d);
+    #ifdef MPI_CHOLLA
+  Free_GPU_Array_bool(G.transfer_particles_flags_d);
+  Free_GPU_Array_int(G.transfer_particles_prefix_sum_d);
+  Free_GPU_Array_int(G.transfer_particles_prefix_sum_blocks_d);
+  Free_GPU_Array_int(G.transfer_particles_indices_d);
+  Free_GPU_Array_int(G.replace_particles_indices_d);
+  Free_GPU_Array_int(G.n_transfer_d);
   free(G.n_transfer_h);
 
-  Free_GPU_Array_Real( G.send_buffer_x0_d );
-  Free_GPU_Array_Real( G.send_buffer_x1_d );
-  Free_GPU_Array_Real( G.send_buffer_y0_d );
-  Free_GPU_Array_Real( G.send_buffer_y1_d );
-  Free_GPU_Array_Real( G.send_buffer_z0_d );
-  Free_GPU_Array_Real( G.send_buffer_z1_d );
-
-  Free_GPU_Array_Real( G.recv_buffer_x0_d );
-  Free_GPU_Array_Real( G.recv_buffer_x1_d );
-  Free_GPU_Array_Real( G.recv_buffer_y0_d );
-  Free_GPU_Array_Real( G.recv_buffer_y1_d );
-  Free_GPU_Array_Real( G.recv_buffer_z0_d );
-  Free_GPU_Array_Real( G.recv_buffer_z1_d );
-
-
-  #endif//MPI_CHOLLA
+  Free_GPU_Array_Real(G.send_buffer_x0_d);
+  Free_GPU_Array_Real(G.send_buffer_x1_d);
+  Free_GPU_Array_Real(G.send_buffer_y0_d);
+  Free_GPU_Array_Real(G.send_buffer_y1_d);
+  Free_GPU_Array_Real(G.send_buffer_z0_d);
+  Free_GPU_Array_Real(G.send_buffer_z1_d);
+
+  Free_GPU_Array_Real(G.recv_buffer_x0_d);
+  Free_GPU_Array_Real(G.recv_buffer_x1_d);
+  Free_GPU_Array_Real(G.recv_buffer_y0_d);
+  Free_GPU_Array_Real(G.recv_buffer_y1_d);
+  Free_GPU_Array_Real(G.recv_buffer_z0_d);
+  Free_GPU_Array_Real(G.recv_buffer_z1_d);
+
+    #endif  // MPI_CHOLLA
 }
 
+  #endif  // PARTICLES_GPU
 
-#endif //PARTICLES_GPU
-
-
-void Particles_3D::Initialize_Grid_Values( void ){
-
-  //Initialize density and gravitational field to 0.
+void Particles3D::Initialize_Grid_Values(void)
+{
+  // Initialize density and gravitational field to 0.
 
   int id;
-  for( id=0; id<G.n_cells; id++ ){
+  for (id = 0; id < G.n_cells; id++) {
     G.density[id] = 0;
-    #ifdef PARTICLES_CPU
+  #ifdef PARTICLES_CPU
     G.gravity_x[id] = 0;
     G.gravity_y[id] = 0;
     G.gravity_z[id] = 0;
-    #endif
+  #endif
   }
 }
 
-void Particles_3D::Initialize_Sphere(struct parameters *P){
-
-  //Initialize Random positions for sphere of quasi-uniform density
-  chprintf( " Initializing Particles Uniform Sphere\n");
+void Particles3D::Initialize_Sphere(struct Parameters *P)
+{
+  // Initialize Random positions for sphere of quasi-uniform density
+  chprintf(" Initializing Particles Uniform Sphere\n");
 
   int i, j, k, id;
   Real center_x, center_y, center_z, radius, sphereR;
   center_x = 0.5;
   center_y = 0.5;
-  center_z = 0.5;;
+  center_z = 0.5;
+  ;
   sphereR = 0.2;
 
-  //Set the number of particles equal to the number of grid cells
-  part_int_t n_particles_local = G.nx_local*G.ny_local*G.nz_local;
-  part_int_t n_particles_total = G.nx_total*G.ny_total*G.nz_total;
+  // Set the number of particles equal to the number of grid cells
+  part_int_t n_particles_local = G.nx_local * G.ny_local * G.nz_local;
+  part_int_t n_particles_total = G.nx_total * G.ny_total * G.nz_total;
 
-  //Set the initial density for the particles
+  // Set the initial density for the particles
   Real rho_start = 1;
-  Real M_sphere = 4./3 * M_PI* rho_start * sphereR*sphereR*sphereR;
+  Real M_sphere  = 4. / 3 * M_PI * rho_start * sphereR * sphereR * sphereR;
   Real Mparticle = M_sphere / n_particles_total;
 
   #ifdef SINGLE_PARTICLE_MASS
@@ -469,247 +487,335 @@ void Particles_3D::Initialize_Sphere(struct parameters *P){
 
   #ifdef PARTICLES_GPU
   // Alocate memory in GPU for particle data
-  particles_array_size = Compute_Particles_GPU_Array_Size( n_particles_local );
-  Allocate_Particles_GPU_Array_Real( &pos_x_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &pos_y_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &pos_z_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &vel_x_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &vel_y_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &vel_z_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &grav_x_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &grav_y_dev, particles_array_size);
-  Allocate_Particles_GPU_Array_Real( &grav_z_dev, particles_array_size);
-  #ifndef SINGLE_PARTICLE_MASS
-  Allocate_Particles_GPU_Array_Real( &mass_dev, particles_array_size);
-  #endif
+  particles_array_size = Compute_Particles_GPU_Array_Size(n_particles_local);
+  Allocate_Particles_GPU_Array_Real(&pos_x_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&pos_y_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&pos_z_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&vel_x_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&vel_y_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&vel_z_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&grav_x_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&grav_y_dev, particles_array_size);
+  Allocate_Particles_GPU_Array_Real(&grav_z_dev, particles_array_size);
+    #ifndef SINGLE_PARTICLE_MASS
+  Allocate_Particles_GPU_Array_Real(&mass_dev, particles_array_size);
+    #endif
+    #ifdef PARTICLE_IDS
+  Allocate_Particles_GPU_Array_Part_Int(&partIDs_dev, particles_array_size);
+    #endif
   n_local = n_particles_local;
 
-  //Allocate temporal Host arrays for the particles data
-  Real *temp_pos_x  = (Real *) malloc(particles_array_size*sizeof(Real));
-  Real *temp_pos_y  = (Real *) malloc(particles_array_size*sizeof(Real));
-  Real *temp_pos_z  = (Real *) malloc(particles_array_size*sizeof(Real));
-  Real *temp_vel_x  = (Real *) malloc(particles_array_size*sizeof(Real));
-  Real *temp_vel_y  = (Real *) malloc(particles_array_size*sizeof(Real));
-  Real *temp_vel_z  = (Real *) malloc(particles_array_size*sizeof(Real));
-  #ifndef SINGLE_PARTICLE_MASS
-  Real *temp_mass   = (Real *) malloc(particles_array_size*sizeof(Real));
-  #endif
-
-  chprintf( " Allocated GPU memory for particle data\n");
-  #endif //PARTICLES_GPU
+  // Allocate temporal Host arrays for the particles data
+  Real *temp_pos_x = (Real *)malloc(particles_array_size * sizeof(Real));
+  Real *temp_pos_y = (Real *)malloc(particles_array_size * sizeof(Real));
+  Real *temp_pos_z = (Real *)malloc(particles_array_size * sizeof(Real));
+  Real *temp_vel_x = (Real *)malloc(particles_array_size * sizeof(Real));
+  Real *temp_vel_y = (Real *)malloc(particles_array_size * sizeof(Real));
+  Real *temp_vel_z = (Real *)malloc(particles_array_size * sizeof(Real));
+    #ifndef SINGLE_PARTICLE_MASS
+  Real *temp_mass = (Real *)malloc(particles_array_size * sizeof(Real));
+    #endif
+    #ifdef PARTICLE_IDS
+  auto *temp_id = (part_int_t *)malloc(particles_array_size * sizeof(part_int_t));
+    #endif
 
+  chprintf(" Allocated GPU memory for particle data\n");
+  #endif  // PARTICLES_GPU
 
-  chprintf( " Initializing Random Positions\n");
+  chprintf(" Initializing Random Positions\n");
 
   part_int_t pID = 0;
   Real pPos_x, pPos_y, pPos_z, r;
   std::mt19937_64 generator(P->prng_seed);
-  std::uniform_real_distribution<Real> xPositionPrng(G.xMin, G.xMax );
-  std::uniform_real_distribution<Real> yPositionPrng(G.yMin, G.yMax );
-  std::uniform_real_distribution<Real> zPositionPrng(G.zMin, G.zMax );
-  while ( pID < n_particles_local ){
+  std::uniform_real_distribution<Real> xPositionPrng(G.xMin, G.xMax);
+  std::uniform_real_distribution<Real> yPositionPrng(G.yMin, G.yMax);
+  std::uniform_real_distribution<Real> zPositionPrng(G.zMin, G.zMax);
+  while (pID < n_particles_local) {
     pPos_x = xPositionPrng(generator);
     pPos_y = yPositionPrng(generator);
     pPos_z = zPositionPrng(generator);
 
-    r = sqrt( (pPos_x-center_x)*(pPos_x-center_x) + (pPos_y-center_y)*(pPos_y-center_y) + (pPos_z-center_z)*(pPos_z-center_z) );
-    if ( r > sphereR ) continue;
+    r = sqrt((pPos_x - center_x) * (pPos_x - center_x) + (pPos_y - center_y) * (pPos_y - center_y) +
+             (pPos_z - center_z) * (pPos_z - center_z));
+    if (r > sphereR) {
+      continue;
+    }
 
-    #ifdef PARTICLES_CPU
-    //Copy the particle data to the particles vectors
-    pos_x.push_back( pPos_x );
-    pos_y.push_back( pPos_y );
-    pos_z.push_back( pPos_z);
-    vel_x.push_back( 0.0 );
-    vel_y.push_back( 0.0 );
-    vel_z.push_back( 0.0 );
-    grav_x.push_back( 0.0 );
-    grav_y.push_back( 0.0 );
-    grav_z.push_back( 0.0 );
+  #ifdef PARTICLES_CPU
+    // Copy the particle data to the particles vectors
+    pos_x.push_back(pPos_x);
+    pos_y.push_back(pPos_y);
+    pos_z.push_back(pPos_z);
+    vel_x.push_back(0.0);
+    vel_y.push_back(0.0);
+    vel_z.push_back(0.0);
+    grav_x.push_back(0.0);
+    grav_y.push_back(0.0);
+    grav_z.push_back(0.0);
     #ifdef PARTICLE_IDS
-    partIDs.push_back( pID );
+    partIDs.push_back(pID);
     #endif
     #ifndef SINGLE_PARTICLE_MASS
-    mass.push_back( Mparticle );
+    mass.push_back(Mparticle);
     #endif
-    #endif //PARTICLES_CPU
+  #endif  // PARTICLES_CPU
 
-    #ifdef PARTICLES_GPU
+  #ifdef PARTICLES_GPU
     // Copy the particle data to the temporal Host Buffers
-    temp_pos_x[pID]  = pPos_x;
-    temp_pos_y[pID]  = pPos_y;
-    temp_pos_z[pID]  = pPos_z;
-    temp_vel_x[pID]  = 0.0;
-    temp_vel_y[pID]  = 0.0;
-    temp_vel_z[pID]  = 0.0;
+    temp_pos_x[pID] = pPos_x;
+    temp_pos_y[pID] = pPos_y;
+    temp_pos_z[pID] = pPos_z;
+    temp_vel_x[pID] = 0.0;
+    temp_vel_y[pID] = 0.0;
+    temp_vel_z[pID] = 0.0;
     #ifndef SINGLE_PARTICLE_MASS
-    temp_mass[pID]  = Mparticle;
+    temp_mass[pID] = Mparticle;
+    #endif
+    #ifdef PARTICLE_IDS
+    temp_id[pID] = pID;
     #endif
-    #endif //PARTICLES_GPU
+  #endif  // PARTICLES_GPU
 
     pID += 1;
   }
 
   #ifdef PARTICLES_CPU
   n_local = pos_x.size();
-  #endif //PARTICLES_CPU
+  #endif  // PARTICLES_CPU
 
   #if defined(PARTICLE_IDS) && defined(MPI_CHOLLA)
-  // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks
-  chprintf( " Computing Global Particles IDs offset \n" );
+  // Get global IDs: Offset the local IDs to get unique global IDs across the
+  // MPI ranks
+  chprintf(" Computing Global Particles IDs offset \n");
   part_int_t global_id_offset;
-  global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local );
-  #ifdef PARTICLES_CPU
-  for ( int p_indx=0; p_indx<n_local; p_indx++ ){
+  global_id_offset = Get_Particles_IDs_Global_MPI_Offset(n_local);
+    #ifdef PARTICLES_CPU
+  for (int p_indx = 0; p_indx < n_local; p_indx++) {
     partIDs[p_indx] += global_id_offset;
   }
-  #endif//PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  //Particles IDs not implemented for PARTICLES_GPU yet
-  #endif//PARTICLES_GPU
-  #endif//PARTICLE_IDS and MPI_CHOLLA
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+  for (int p_indx = 0; p_indx < n_local; p_indx++) {
+    temp_id[p_indx] += global_id_offset;
+  }
+    #endif  // PARTICLES_GPU
+  #endif    // PARTICLE_IDS and MPI_CHOLLA
 
   #ifdef PARTICLES_GPU
-  //Copyt the particle data from tepmpotal Host buffer to GPU memory
-  Copy_Particles_Array_Real_Host_to_Device( temp_pos_x, pos_x_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( temp_pos_y, pos_y_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( temp_pos_z, pos_z_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( temp_vel_x, vel_x_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( temp_vel_y, vel_y_dev, n_local);
-  Copy_Particles_Array_Real_Host_to_Device( temp_vel_z, vel_z_dev, n_local);
-  #ifndef SINGLE_PARTICLE_MASS
-  Copy_Particles_Array_Real_Host_to_Device( temp_mass, mass_dev, n_local);
-  #endif
-
-  //Free the temporal host buffers
-  free( temp_pos_x );
-  free( temp_pos_y );
-  free( temp_pos_z );
-  free( temp_vel_x );
-  free( temp_vel_y );
-  free( temp_vel_z );
-  #ifndef SINGLE_PARTICLE_MASS
-  free( temp_mass );
-  #endif
-  #endif //PARTICLES_GPU
+  // Copyt the particle data from tepmpotal Host buffer to GPU memory
+  Copy_Particles_Array_Real_Host_to_Device(temp_pos_x, pos_x_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(temp_pos_y, pos_y_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(temp_pos_z, pos_z_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(temp_vel_x, vel_x_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(temp_vel_y, vel_y_dev, n_local);
+  Copy_Particles_Array_Real_Host_to_Device(temp_vel_z, vel_z_dev, n_local);
+    #ifndef SINGLE_PARTICLE_MASS
+  Copy_Particles_Array_Real_Host_to_Device(temp_mass, mass_dev, n_local);
+    #endif
+    #ifdef PARTICLE_IDS
+  Copy_Particles_Array_Int_Host_to_Device(temp_id, partIDs_dev, n_local);
+    #endif
 
-  chprintf( " Particles Uniform Sphere Initialized, n_local: %lu\n", n_local);
+  // Free the temporal host buffers
+  free(temp_pos_x);
+  free(temp_pos_y);
+  free(temp_pos_z);
+  free(temp_vel_x);
+  free(temp_vel_y);
+  free(temp_vel_z);
+    #ifndef SINGLE_PARTICLE_MASS
+  free(temp_mass);
+    #endif
+    #ifdef PARTICLE_IDS
+  free(temp_id);
+    #endif
+  #endif  // PARTICLES_GPU
 
+  chprintf(" Particles Uniform Sphere Initialized, n_local: %lu\n", n_local);
 }
 
-
+  #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS)
 /**
- *   Initializes a disk population of uniform mass (\f$(10^4 M_\odot)\f$) stellar clusters
+ *   Initializes a disk population of uniform mass stellar clusters
  */
-void Particles_3D::Initialize_Disk_Stellar_Clusters(struct parameters *P) {
-  #ifdef PARTICLES_GPU
-      chprintf( " Initialize_Disk_Stellar_Clusters: PARTICLES_GPU not currently supported\n");
-      chexit(-1);
-  #endif
-  #ifndef SINGLE_PARTICLE_MASS
-      chprintf( " Initialize_Disk_Stellar_Clusters: only SINGLE_PARTICLE_MASS currently supported\n");
-      chexit(-1);
-  #endif
-  chprintf( " Initializing Particles Stellar Disk\n");
+void Particles3D::Initialize_Disk_Stellar_Clusters(struct Parameters *P)
+{
+  chprintf(" Initializing Particles Stellar Disk\n");
 
   // Set up the PRNG
   std::mt19937_64 generator(P->prng_seed);
 
-  std::gamma_distribution<Real> radialDist(2,1);           //for generating cyclindrical radii
-  std::uniform_real_distribution<Real> zDist(0, 1);        //for generating height above/below the disk.
-  std::uniform_real_distribution<Real> phiDist(0, 2*M_PI); //for generating phi
-  std::normal_distribution<Real> speedDist(0, 1);          //for generating random speeds.
-
-  Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars)
-  Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc
-  Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc
-  Real R_max = sqrt(P->xlen*P->xlen + P->ylen*P->ylen)/2;
-  R_max = P->xlen / 2.0;
+  std::gamma_distribution<Real> radialDist(2, 1);  // for generating cyclindrical radii
+  std::uniform_real_distribution<Real> zDist(-0.005, 0.005);
+  std::uniform_real_distribution<Real> vzDist(-1e-8, 1e-8);
+  std::uniform_real_distribution<Real> phiDist(0,
+                                               2 * M_PI);  // for generating phi
+  std::normal_distribution<Real> speedDist(0,
+                                           1);  // for generating random speeds.
+
+  Real M_d   = galaxies::MW.getM_d();  // MW disk mass in M_sun (assumed to be all in stars)
+  Real R_d   = galaxies::MW.getR_d();  // MW stellar disk scale length in kpc
+  Real Z_d   = galaxies::MW.getZ_d();  // MW stellar height scale length in kpc
+  Real R_max = sqrt(P->xlen * P->xlen + P->ylen * P->ylen) / 2;
+  R_max      = P->xlen / 2.0;
+
+  real_vector_t temp_pos_x;
+  real_vector_t temp_pos_y;
+  real_vector_t temp_pos_z;
+  real_vector_t temp_vel_x;
+  real_vector_t temp_vel_y;
+  real_vector_t temp_vel_z;
+  real_vector_t temp_grav_x;
+  real_vector_t temp_grav_y;
+  real_vector_t temp_grav_z;
+  real_vector_t temp_mass;
+  int_vector_t temp_ids;
+  real_vector_t temp_age;
 
   Real x, y, z, R, phi;
   Real vx, vy, vz, vel, ac;
   Real expFactor, vR_rms, vR, vPhi_str, vPhi, v_c2, vPhi_rand_rms, kappa2;
-  particle_mass = 1e4;  //solar masses
-  //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116);  //2kpc radius
-  unsigned long int N = (long int)(6.5e6 * 0.9272485558395908);   // 15kpc radius
-  long lost_particles = 0;
-  for ( unsigned long int i = 0; i < N; i++ ){
-      do {
-          R = R_d*radialDist(generator);
-      } while (R > R_max);
-
-      phi = phiDist(generator);
-      x = R * cos(phi);
-      y = R * sin(phi);
-      z = 0;
-
-      if (x < G.xMin || x >= G.xMax) continue;
-      if (y < G.yMin || y >= G.yMax) continue;
-      if (z < G.zMin || z >= G.zMax) continue;
-
-      ac  = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0));
-      vPhi = sqrt(R*ac);
-
-      vx =  -vPhi*sin(phi);
-      vy =  vPhi*cos(phi);
-      vz = 0;
-
-      #ifdef PARTICLES_CPU
-      //Copy the particle data to the particles vectors
-      pos_x.push_back(x);
-      pos_y.push_back(y);
-      pos_z.push_back(z);
-      vel_x.push_back(vx);
-      vel_y.push_back(vy);
-      vel_z.push_back(vz);
-      grav_x.push_back(0.0);
-      grav_y.push_back(0.0);
-      grav_z.push_back(0.0);
-
-      #ifdef PARTICLE_IDS
-      partIDs.push_back(i);
-      #endif //PARTICLE_IDS
-
-      #ifdef PARTICLE_AGE
-      //if (fabs(z) >= Z_d) age.push_back(1.1e4);
-      //else age.push_back(0.0);
-      age.push_back(0.0);
-      #endif
-
-      #endif//PARTICLES_CPU
-  }
+  // unsigned long int N = (long int)(6.5e6 * 0.11258580827352116);  //2kpc
+  // radius unsigned long int N = 13; //(long int)(6.5e6 * 0.9272485558395908);
+  // // 15kpc radius
+  Real total_mass               = 0;
+  Real upper_limit_cluster_mass = 1e7;
+  long lost_particles           = 0;
+  part_int_t id                 = -1;
+  while (total_mass < upper_limit_cluster_mass) {
+    Real cluster_mass = galaxies::MW.singleClusterMass(generator);
+    total_mass += cluster_mass;
+    id += 1;  // do this here before we check whether the particle is in the MPI
+              // domain, otherwise could end up with duplicated IDs
+    do {
+      R = R_d * radialDist(generator);
+    } while (R > R_max);
+
+    phi = phiDist(generator);
+    x   = R * cos(phi);
+    y   = R * sin(phi);
+    z   = zDist(generator);
+
+    if (x < G.xMin || x >= G.xMax) {
+      continue;
+    }
+    if (y < G.yMin || y >= G.yMax) {
+      continue;
+    }
+    if (z < G.zMin || z >= G.zMax) {
+      continue;
+    }
 
-  #ifdef PARTICLES_CPU
-  n_local = pos_x.size();
-  #endif
+    ac   = fabs(galaxies::MW.gr_disk_D3D(R, 0) + galaxies::MW.gr_halo_D3D(R, 0));
+    vPhi = sqrt(R * ac);
+
+    vx = -vPhi * sin(phi);
+    vy = vPhi * cos(phi);
+    vz = 0.0;  // vzDist(generator);
+
+    // add particle data to the particles vectors
+    temp_pos_x.push_back(x);
+    temp_pos_y.push_back(y);
+    temp_pos_z.push_back(z);
+    temp_vel_x.push_back(vx);
+    temp_vel_y.push_back(vy);
+    temp_vel_z.push_back(vz);
+    temp_grav_x.push_back(0.0);
+    temp_grav_y.push_back(0.0);
+    temp_grav_z.push_back(0.0);
+    temp_mass.push_back(cluster_mass);
+    temp_age.push_back(0.0);
+    temp_ids.push_back(id);
+  }
 
-  if (lost_particles > 0) chprintf("  lost %lu particles\n", lost_particles);
-  chprintf( " Stellar Disk Particles Initialized, n_local: %lu\n", n_local);
-}
+  n_local = temp_pos_x.size();
+
+    /*
+      part_int_t global_id_offset = 0;
+      #ifdef MPI_CHOLLA
+      // Get global IDs: Offset the local IDs to get unique global IDs across
+      the MPI ranks chprintf( " Computing Global Particles IDs offset \n" );
+      global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local );
+      #endif //MPI_CHOLLA
+      for ( int i=0; i<n_local; i++ ){
+        temp_ids.push_back( i + global_id_offset);
+      }
+      */
 
+    #ifdef PARTICLES_CPU
+  pos_x   = temp_pos_x;
+  pos_y   = temp_pos_y;
+  pos_z   = temp_pos_z;
+  vel_x   = temp_vel_x;
+  vel_y   = temp_vel_y;
+  vel_z   = temp_vel_z;
+  grav_x  = temp_grav_x;
+  grav_y  = temp_grav_y;
+  grav_z  = temp_grav_z;
+  mass    = temp_mass;
+  partIDs = temp_ids;
+  age     = temp_age;
+    #endif  // PARTICLES_CPU
 
-void Particles_3D::Initialize_Zeldovich_Pancake( struct parameters *P ){
+    #ifdef PARTICLES_GPU
+  particles_array_size = Compute_Particles_GPU_Array_Size(n_local);
+  Allocate_Particles_GPU_Array_Real(&pos_x_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_pos_x.data(), pos_x_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&pos_y_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_pos_y.data(), pos_y_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&pos_z_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_pos_z.data(), pos_z_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&vel_x_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_vel_x.data(), vel_x_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&vel_y_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_vel_y.data(), vel_y_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&vel_z_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_vel_z.data(), vel_z_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&grav_x_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_grav_x.data(), grav_x_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&grav_y_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_grav_y.data(), grav_y_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&grav_z_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_grav_z.data(), grav_z_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&mass_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_mass.data(), mass_dev, n_local);
+  Allocate_Particles_GPU_Array_Part_Int(&partIDs_dev, particles_array_size);
+  Copy_Particles_Array_Int_Host_to_Device(temp_ids.data(), partIDs_dev, n_local);
+  Allocate_Particles_GPU_Array_Real(&age_dev, particles_array_size);
+  Copy_Particles_Array_Real_Host_to_Device(temp_age.data(), age_dev, n_local);
+    #endif  // PARTICLES_GPU
+
+  if (lost_particles > 0) {
+    chprintf("  lost %lu particles\n", lost_particles);
+  }
+  chprintf(
+      "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, "
+      "total_mass: %.3e s.m.\n",
+      id + 1, n_local, total_mass);
+}
+  #endif
 
-  //No partidcles for the Zeldovich Pancake problem. n_local=0
+void Particles3D::Initialize_Zeldovich_Pancake(struct Parameters *P)
+{
+  // No particles for the Zeldovich Pancake problem. n_local=0
 
   chprintf("Setting Zeldovich Pancake initial conditions...\n");
 
   // n_local = pos_x.size();
   n_local = 0;
 
-  chprintf( " Particles Zeldovich Pancake Initialized, n_local: %lu\n", n_local);
-
+  chprintf(" Particles Zeldovich Pancake Initialized, n_local: %lu\n", n_local);
 }
 
-
-void Grid3D::Initialize_Uniform_Particles(){
-  //Initialize positions assigning one particle at each cell in a uniform grid
+void Grid3D::Initialize_Uniform_Particles()
+{
+  // Initialize positions assigning one particle at each cell in a uniform grid
 
   int i, j, k, id;
   Real x_pos, y_pos, z_pos;
 
   Real dVol, Mparticle;
-  dVol = H.dx * H.dy * H.dz;
+  dVol      = H.dx * H.dy * H.dz;
   Mparticle = dVol;
 
   #ifdef SINGLE_PARTICLE_MASS
@@ -717,31 +823,31 @@ void Grid3D::Initialize_Uniform_Particles(){
   #endif
 
   part_int_t pID = 0;
-  for (k=H.n_ghost; k<H.nz-H.n_ghost; k++) {
-    for (j=H.n_ghost; j<H.ny-H.n_ghost; j++) {
-      for (i=H.n_ghost; i<H.nx-H.n_ghost; i++) {
-        id = i + j*H.nx + k*H.nx*H.ny;
+  for (k = H.n_ghost; k < H.nz - H.n_ghost; k++) {
+    for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) {
+      for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) {
+        id = i + j * H.nx + k * H.nx * H.ny;
 
         // // get the centered cell positions at (i,j,k)
         Get_Position(i, j, k, &x_pos, &y_pos, &z_pos);
 
-        #ifdef PARTICLES_CPU
-        Particles.pos_x.push_back( x_pos - 0.25*H.dx );
-        Particles.pos_y.push_back( y_pos - 0.25*H.dy );
-        Particles.pos_z.push_back( z_pos - 0.25*H.dz );
-        Particles.vel_x.push_back( 0.0 );
-        Particles.vel_y.push_back( 0.0 );
-        Particles.vel_z.push_back( 0.0 );
-        Particles.grav_x.push_back( 0.0 );
-        Particles.grav_y.push_back( 0.0 );
-        Particles.grav_z.push_back( 0.0 );
-        #ifdef PARTICLE_IDS
-        Particles.partIDs.push_back( pID );
-        #endif
-        #ifndef SINGLE_PARTICLE_MASS
-        Particles.mass.push_back( Mparticle );
-        #endif
-        #endif //PARTICLES_CPU
+  #ifdef PARTICLES_CPU
+        Particles.pos_x.push_back(x_pos - 0.25 * H.dx);
+        Particles.pos_y.push_back(y_pos - 0.25 * H.dy);
+        Particles.pos_z.push_back(z_pos - 0.25 * H.dz);
+        Particles.vel_x.push_back(0.0);
+        Particles.vel_y.push_back(0.0);
+        Particles.vel_z.push_back(0.0);
+        Particles.grav_x.push_back(0.0);
+        Particles.grav_y.push_back(0.0);
+        Particles.grav_z.push_back(0.0);
+    #ifdef PARTICLE_IDS
+        Particles.partIDs.push_back(pID);
+    #endif
+    #ifndef SINGLE_PARTICLE_MASS
+        Particles.mass.push_back(Mparticle);
+    #endif
+  #endif  // PARTICLES_CPU
 
         pID += 1;
       }
@@ -752,20 +858,19 @@ void Grid3D::Initialize_Uniform_Particles(){
   Particles.n_local = Particles.pos_x.size();
   #endif
 
-
   #ifdef MPI_CHOLLA
   Particles.n_total_initial = ReducePartIntSum(Particles.n_local);
   #else
   Particles.n_total_initial = Particles.n_local;
   #endif
 
-  chprintf( " Particles Uniform Grid Initialized, n_local: %lu, n_total: %lu\n", Particles.n_local, Particles.n_total_initial );
+  chprintf(" Particles Uniform Grid Initialized, n_local: %lu, n_total: %lu\n", Particles.n_local,
+           Particles.n_total_initial);
 }
 
-
-void Particles_3D::Free_Memory(void){
-
-  //Free the particles arrays
+void Particles3D::Free_Memory(void)
+{
+  // Free the particles arrays
   free(G.density);
 
   #ifdef PARTICLES_CPU
@@ -773,7 +878,7 @@ void Particles_3D::Free_Memory(void){
   free(G.gravity_y);
   free(G.gravity_z);
 
-  //Free the particles vectors
+  // Free the particles vectors
   pos_x.clear();
   pos_y.clear();
   pos_z.clear();
@@ -784,19 +889,19 @@ void Particles_3D::Free_Memory(void){
   grav_y.clear();
   grav_z.clear();
 
-  #ifdef PARTICLE_IDS
+    #ifdef PARTICLE_IDS
   partIDs.clear();
-  #endif
+    #endif
 
-  #ifndef SINGLE_PARTICLE_MASS
+    #ifndef SINGLE_PARTICLE_MASS
   mass.clear();
-  #endif
-
-  #endif //PARTICLES_CPU
+    #endif
 
+  #endif  // PARTICLES_CPU
 }
 
-void Particles_3D::Reset( void ){
+void Particles3D::Reset(void)
+{
   Free_Memory();
 
   #ifdef PARTICLES_GPU
@@ -805,5 +910,4 @@ void Particles_3D::Reset( void ){
   #endif
 }
 
-
-#endif//PARTICLES
+#endif  // PARTICLES
diff --git a/src/particles/particles_3D.h b/src/particles/particles_3D.h
index 7c8990f8a..58f2137eb 100644
--- a/src/particles/particles_3D.h
+++ b/src/particles/particles_3D.h
@@ -1,30 +1,29 @@
 #ifdef PARTICLES
 
-#ifndef PARTICLES_H
-#define PARTICLES_H
+  #ifndef PARTICLES_H
+    #define PARTICLES_H
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <cstdlib>
-#include <string.h>
-#include "../global/global.h"
-#include "../gravity/grav3D.h"
+    #include <math.h>
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <string.h>
 
-#ifdef PARTICLES_GPU
-#define TPB_PARTICLES 1024
-// #define PRINT_GPU_MEMORY
-#define PRINT_MAX_MEMORY_USAGE
-#endif
+    #include <cstdlib>
 
+    #include "../global/global.h"
+    #include "../gravity/grav3D.h"
 
+    #ifdef PARTICLES_GPU
+      #define TPB_PARTICLES 1024
+      // #define PRINT_GPU_MEMORY
+      #define PRINT_MAX_MEMORY_USAGE
+    #endif
 
 /*! \class Part3D
  *  \brief Class to create a set of particles in 3D space. */
-class Particles_3D
+class Particles3D
 {
-  public:
-
+ public:
   part_int_t n_local;
 
   part_int_t n_total;
@@ -40,22 +39,21 @@ class Particles_3D
 
   Real particle_mass;
 
-  #ifdef COSMOLOGY
+    #ifdef COSMOLOGY
   Real current_z;
   Real current_a;
-  #endif
-
+    #endif
 
-  #ifdef PARTICLES_CPU
-  #ifdef PARTICLE_IDS
+    #ifdef PARTICLES_CPU
+      #ifdef PARTICLE_IDS
   int_vector_t partIDs;
-  #endif
-  #ifndef SINGLE_PARTICLE_MASS
+      #endif
+      #ifndef SINGLE_PARTICLE_MASS
   real_vector_t mass;
-  #endif
-  #ifdef PARTICLE_AGE
+      #endif
+      #ifdef PARTICLE_AGE
   real_vector_t age;
-  #endif
+      #endif
   real_vector_t pos_x;
   real_vector_t pos_y;
   real_vector_t pos_z;
@@ -65,13 +63,16 @@ class Particles_3D
   real_vector_t grav_x;
   real_vector_t grav_y;
   real_vector_t grav_z;
-  #endif //PARTICLES_CPU
+    #endif  // PARTICLES_CPU
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   part_int_t particles_array_size;
-  #ifdef PARTICLE_IDS
+      #ifdef PARTICLE_IDS
   part_int_t *partIDs_dev;
-  #endif
+      #endif
+      #ifdef PARTICLE_AGE
+  Real *age_dev;
+      #endif
   Real *mass_dev;
   Real *pos_x_dev;
   Real *pos_y_dev;
@@ -83,11 +84,9 @@ class Particles_3D
   Real *grav_y_dev;
   Real *grav_z_dev;
 
+    #endif  // PARTICLES_GPU
 
-  #endif //PARTICLES_GPU
-
-
-  #ifdef MPI_CHOLLA
+    #ifdef MPI_CHOLLA
 
   part_int_t n_transfer_x0;
   part_int_t n_transfer_x1;
@@ -117,26 +116,21 @@ class Particles_3D
   part_int_t n_in_buffer_z0;
   part_int_t n_in_buffer_z1;
 
-
-  #ifdef PARTICLES_CPU
+      #ifdef PARTICLES_CPU
   int_vector_t out_indxs_vec_x0;
   int_vector_t out_indxs_vec_x1;
   int_vector_t out_indxs_vec_y0;
   int_vector_t out_indxs_vec_y1;
   int_vector_t out_indxs_vec_z0;
   int_vector_t out_indxs_vec_z1;
-  #endif //PARTICLES_CPU
-
+      #endif  // PARTICLES_CPU
 
-  #endif //MPI_CHOLLA
+    #endif  // MPI_CHOLLA
 
   bool TRANSFER_DENSITY_BOUNDARIES;
   bool TRANSFER_PARTICLES_BOUNDARIES;
 
-
-  struct Grid
-  {
-
+  struct Grid {
     int nx_local, ny_local, nz_local;
     int nx_total, ny_total, nz_total;
 
@@ -165,11 +159,10 @@ class Particles_3D
     Real *gravity_x;
     Real *gravity_y;
     Real *gravity_z;
-    #ifdef GRAVITY_GPU
+      #ifdef GRAVITY_GPU
     Real *density_dev;
+      #endif
     #endif
-    #endif
-    
 
     #ifdef PARTICLES_GPU
     Real *density_dev;
@@ -180,7 +173,7 @@ class Particles_3D
     Real *dti_array_dev;
     Real *dti_array_host;
 
-    #ifdef MPI_CHOLLA
+      #ifdef MPI_CHOLLA
     bool *transfer_particles_flags_d;
     int *transfer_particles_indices_d;
     int *replace_particles_indices_d;
@@ -215,72 +208,104 @@ class Particles_3D
     Real *recv_buffer_z0_d;
     Real *recv_buffer_z1_d;
 
-    #endif // MPI_CHOLLA
-
-    #endif //PARTICLES_GPU
+      #endif  // MPI_CHOLLA
 
+    #endif  // PARTICLES_GPU
 
   } G;
 
-  Particles_3D(void);
+  Particles3D(void);
 
-  void Initialize( struct parameters *P, Grav3D &Grav,  Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal  );
+  void Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal,
+                  Real ydglobal, Real zdglobal);
 
-  void Allocate_Particles_Grid_Field_Real( Real **array_dev, int size );
-  void Free_GPU_Array_Real( Real *array );
-  
-  #ifdef PARTICLES_GPU
+  void Allocate_Particles_Grid_Field_Real(Real **array_dev, int size);
+  void Free_GPU_Array_Real(Real *array);
+
+    #ifdef PARTICLES_GPU
 
-  void Free_GPU_Array_int( int *array );
-  void Free_GPU_Array_bool( bool *array );
+  void Free_GPU_Array_int(int *array);
+  void Free_GPU_Array_bool(bool *array);
+  template <typename T>
+  void Free_GPU_Array(T *array)
+  {
+    cudaFree(array);
+  }  // TODO remove the Free_GPU_Array_<type> functions
   void Allocate_Memory_GPU();
-  void Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size );
-  void Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size );
-  void Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size );
-  void Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size);
-  void Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size);
-  void Set_Particles_Array_Real( Real value, Real *array_dev, part_int_t size);
+  void Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size);
+  void Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size);
+  void Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size);
+  void Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size);
+  void Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size);
+  void Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size);
+  void Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, part_int_t size);
+  void Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, part_int_t size);
+  void Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size);
   void Free_Memory_GPU();
   void Initialize_Grid_Values_GPU();
   void Get_Density_CIC_GPU();
-  void Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass,  Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev , Real *pos_z_dev, Real *mass_dev);
+  void Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, Real yMax,
+                                    Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local,
+                                    int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h,
+                                    Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev,
+                                    Real *mass_dev);
   void Clear_Density_GPU();
-  void Clear_Density_GPU_function( Real *density_dev, int n_cells);
-  void Copy_Potential_To_GPU( Real *potential_host, Real *potential_dev, int n_cells_potential );
-  void Get_Gravity_Field_Particles_GPU( Real *potential_host );
-  void Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz,  Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev  );
+  void Clear_Density_GPU_function(Real *density_dev, int n_cells);
+  void Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential);
+  void Get_Gravity_Field_Particles_GPU(Real *potential_host);
+  void Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid,
+                                                int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host,
+                                                Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev,
+                                                Real *gravity_z_dev);
   void Get_Gravity_CIC_GPU();
-  void Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin,  Real zMax, Real dx, Real dy, Real dz,   Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev,  Real *grav_y_dev,  Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev );
-  Real Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev );
-  void Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev  );
-  void Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K  );
-  void Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev  );
-  void Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_local, Real delta_a,  Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K  );
-  part_int_t Compute_Particles_GPU_Array_Size( part_int_t n );
-  int Select_Particles_to_Transfer_GPU( int direction, int side );
-  void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer, int buffer_length );
-  void Replace_Tranfered_Particles_GPU( int n_transfer );
-  void Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv );
-  void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d );
-  #ifdef PRINT_MAX_MEMORY_USAGE
+  void Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local,
+                                    int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin,
+                                    Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev,
+                                    Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev,
+                                    Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev);
+  Real Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev,
+                                      Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev);
+  void Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev,
+                                                Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev,
+                                                Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev);
+  void Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev,
+                                                      Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev,
+                                                      Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev,
+                                                      Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0,
+                                                      Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K);
+  void Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev,
+                                                Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev);
+  void Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev,
+                                                      Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev,
+                                                      Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0,
+                                                      Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K);
+  part_int_t Compute_Particles_GPU_Array_Size(part_int_t n);
+  int Select_Particles_to_Transfer_GPU(int direction, int side);
+  void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer,
+                                             int buffer_length);
+  void Replace_Tranfered_Particles_GPU(int n_transfer);
+  void Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv);
+  void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d);
+  void Set_Particles_Open_Boundary_GPU(int dir, int side);
+      #ifdef PRINT_MAX_MEMORY_USAGE
   void Print_Max_Memory_Usage();
-  #endif
-  
-  #endif //PARTICLES_GPU
-
+      #endif
 
+    #endif  // PARTICLES_GPU
 
   void Allocate_Memory();
 
   void Initialize_Grid_Values();
 
-  void Initialize_Sphere(struct parameters *P);
+  void Initialize_Sphere(struct Parameters *P);
 
-  void Initialize_Disk_Stellar_Clusters(struct parameters *P);
+    #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS)
+  void Initialize_Disk_Stellar_Clusters(struct Parameters *P);
+    #endif
 
-  void Initialize_Zeldovich_Pancake( struct parameters *P );
+  void Initialize_Zeldovich_Pancake(struct Parameters *P);
 
-  void Load_Particles_Data( struct parameters *P );
+  void Load_Particles_Data(struct Parameters *P);
 
   void Free_Memory();
 
@@ -288,44 +313,44 @@ class Particles_3D
 
   void Clear_Density();
 
-  void Get_Density_CIC_Serial( );
+  void Get_Density_CIC_Serial();
 
-  #ifdef HDF5
-  void Load_Particles_Data_HDF5( hid_t file_id, int nfile, struct parameters *P );
-  #endif
+    #ifdef HDF5
+  void Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Parameters *P);
+    #endif
 
-  #ifdef PARALLEL_OMP
-  void Get_Density_CIC_OMP( );
-  #endif
+    #ifdef PARALLEL_OMP
+  void Get_Density_CIC_OMP();
+    #endif
 
   void Get_Density_CIC();
 
-  #ifdef MPI_CHOLLA
-  void Clear_Particles_For_Transfer( void );
-  void Select_Particles_to_Transfer_All( int *flags );
-  void Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge,
+    #ifdef MPI_CHOLLA
+  void Clear_Particles_For_Transfer(void);
+  void Select_Particles_to_Transfer_All(int *flags);
+  void Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge,
                               Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z);
   void Remove_Transfered_Particles();
 
-  #ifdef PARTICLES_CPU
-  void Clear_Vectors_For_Transfers( void );
-  void Add_Particle_To_Vectors( Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z, int *flags );
-  void Select_Particles_to_Transfer_All_CPU( int *flags );
-  void Load_Particles_to_Buffer_CPU( int direction, int side, Real *send_buffer, int buffer_length  );
-  void Unload_Particles_from_Buffer_CPU( int direction, int side, Real *recv_buffer, part_int_t n_recv,
-        Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, int buffer_length_z0, int buffer_length_z1, int *flags);
-  #endif//PARTICLES_CPU
-
-
-  #ifdef PARTICLES_GPU
+      #ifdef PARTICLES_CPU
+  void Clear_Vectors_For_Transfers(void);
+  void Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x,
+                               Real pVel_y, Real pVel_z, int *flags);
+  void Select_Particles_to_Transfer_All_CPU(int *flags);
+  void Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length);
+  void Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv,
+                                        Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0,
+                                        Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1,
+                                        int buffer_length_z0, int buffer_length_z1, int *flags);
+      #endif  // PARTICLES_CPU
+
+      #ifdef PARTICLES_GPU
   void Allocate_Memory_GPU_MPI();
   void ReAllocate_Memory_GPU_MPI();
-  void Load_Particles_to_Buffer_GPU( int direction, int side, Real *send_buffer, int buffer_length  );
-  #endif //PARTICLES_GPU
-  #endif
-
+  void Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer, int buffer_length);
+      #endif  // PARTICLES_GPU
+    #endif
 };
 
-
-#endif //PARTICLES_H
-#endif //PARTICLES
+  #endif  // PARTICLES_H
+#endif    // PARTICLES
diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu
index 6ce4bec0c..d72c9bc81 100644
--- a/src/particles/particles_3D_gpu.cu
+++ b/src/particles/particles_3D_gpu.cu
@@ -1,173 +1,199 @@
-#if defined(PARTICLES) 
+#if defined(PARTICLES)
 
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../io/io.h"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../particles/particles_3D.h"
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <unistd.h>
 
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../io/io.h"
+  #include "../utils/gpu.hpp"
+  #include "particles_3D.h"
 
+void Particles3D::Free_GPU_Array_Real(Real *array) { cudaFree(array); }
 
-
-
-void Particles_3D::Free_GPU_Array_Real( Real *array ){ cudaFree(array); }
-
-
-void Particles_3D::Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ){
+void Particles3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size)
+{
   size_t global_free, global_total;
-  CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) );
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
   #ifdef PRINT_GPU_MEMORY
-  chprintf( "Allocating GPU Memory:  %ld  MB free \n", global_free/1000000);
+  chprintf("Allocating GPU Memory:  %ld  MB free \n", global_free / 1000000);
   #endif
-  if ( global_free < size*sizeof(Real) ){
-    printf( "ERROR: Not enough global device memory \n" );
-    printf( " Available Memory: %ld  MB \n", global_free/1000000  );
-    printf( " Requested Memory: %ld  MB \n", size*sizeof(Real)/1000000  );
+  if (global_free < size * sizeof(Real)) {
+    printf("ERROR: Not enough global device memory \n");
+    printf(" Available Memory: %ld  MB \n", global_free / 1000000);
+    printf(" Requested Memory: %ld  MB \n", size * sizeof(Real) / 1000000);
     exit(-1);
   }
-  CudaSafeCall( cudaMalloc((void**)array_dev,  size*sizeof(Real)) );
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real)));
   cudaDeviceSynchronize();
 }
 
+  #ifdef PARTICLES_GPU
 
+    #ifdef PRINT_MAX_MEMORY_USAGE
+      #include "../mpi/mpi_routines.h"
 
-#ifdef PARTICLES_GPU
-
-#ifdef PRINT_MAX_MEMORY_USAGE
-#include "../mpi/mpi_routines.h"
-
-void Particles_3D::Print_Max_Memory_Usage(){
-  
+void Particles3D::Print_Max_Memory_Usage()
+{
   size_t global_free, global_total;
-  CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) );
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
   cudaDeviceSynchronize();
-  
+
   part_int_t n_local_max, n_total, mem_usage;
   Real fraction_max, global_free_min;
-  
-  n_local_max = (part_int_t) ReduceRealMax( (Real) n_local );
-  n_total = ReducePartIntSum( n_local );
-  fraction_max = (Real) n_local_max / (Real) n_total;
-  mem_usage = n_local_max * 9 * sizeof(Real); //Usage for pos, vel ans accel.
-  
-  global_free_min = ReduceRealMin( (Real) global_free  );
-  
-  chprintf( " Particles GPU Memory: N_local_max: %ld  (%.1f %)  mem_usage: %ld MB     global_free_min: %.1f MB  \n", n_local_max, fraction_max*100, mem_usage/1000000, global_free_min/1000000 );
-  
-  
-}
 
-#endif 
+  n_local_max  = (part_int_t)ReduceRealMax((Real)n_local);
+  n_total      = ReducePartIntSum(n_local);
+  fraction_max = (Real)n_local_max / (Real)n_total;
+  mem_usage    = n_local_max * 9 * sizeof(Real);  // Usage for pos, vel ans accel.
 
+  global_free_min = ReduceRealMin((Real)global_free);
 
+  chprintf(
+      " Particles GPU Memory: N_local_max: %ld  (%.1f %)  mem_usage: %ld MB    "
+      " global_free_min: %.1f MB  \n",
+      n_local_max, fraction_max * 100, mem_usage / 1000000, global_free_min / 1000000);
+}
 
-void Particles_3D::Free_GPU_Array_int( int *array )  { cudaFree(array); }
-void Particles_3D::Free_GPU_Array_bool( bool *array ){ cudaFree(array); }
+    #endif
 
+void Particles3D::Free_GPU_Array_int(int *array) { cudaFree(array); }
+void Particles3D::Free_GPU_Array_bool(bool *array) { cudaFree(array); }
 
-void __global__ Copy_Device_to_Device_Kernel( Real *src_array_dev, Real *dst_array_dev, part_int_t size ){
-  int tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid < size ) dst_array_dev[tid] = src_array_dev[tid];
+template <typename T>
+void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, part_int_t size)
+{
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid < size) {
+    dst_array_dev[tid] = src_array_dev[tid];
+  }
 }
 
-void Copy_Device_to_Device( Real *src_array_dev, Real *dst_array_dev, part_int_t size ){
-  int ngrid =  (size + TPB_PARTICLES - 1) / TPB_PARTICLES;
+template <typename T>
+void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size)
+{
+  int ngrid = (size - 1) / TPB_PARTICLES + 1;
   dim3 dim1dGrid(ngrid, 1, 1);
   dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
-  hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0,  src_array_dev, dst_array_dev, size);
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size);
+  GPU_Error_Check();
 }
 
-
-void Particles_3D::Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size ){
+void Particles3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size)
+{
   size_t global_free, global_total;
-  CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) );
-  #ifdef PRINT_GPU_MEMORY
-  chprintf( "Allocating GPU Memory:  %ld  MB free \n", global_free/1000000);
-  #endif
-  if ( global_free < size*sizeof(Real) ){
-    printf( "ERROR: Not enough global device memory \n" );
-    printf( " Available Memory: %ld  MB \n", global_free/1000000  );
-    printf( " Requested Memory: %ld  MB \n", size*sizeof(Real)/1000000  );
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
+    #ifdef PRINT_GPU_MEMORY
+  chprintf("Allocating GPU Memory:  %ld  MB free \n", global_free / 1000000);
+    #endif
+  if (global_free < size * sizeof(Real)) {
+    printf("ERROR: Not enough global device memory \n");
+    printf(" Available Memory: %ld  MB \n", global_free / 1000000);
+    printf(" Requested Memory: %ld  MB \n", size * sizeof(Real) / 1000000);
     exit(-1);
   }
-  CudaSafeCall( cudaMalloc((void**)array_dev,  size*sizeof(Real)) );
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real)));
   cudaDeviceSynchronize();
 }
 
-void Particles_3D::Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ){
+void Particles3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size)
+{
   size_t global_free, global_total;
-  CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) );
-  #ifdef PRINT_GPU_MEMORY
-  chprintf( "Allocating GPU Memory:  %ld  MB free \n", global_free/1000000);
-  #endif
-  if ( global_free < size*sizeof(int) ){
-    printf( "ERROR: Not enough global device memory \n" );
-    printf( " Available Memory: %ld  MB \n", global_free/1000000  );
-    printf( " Requested Memory: %ld  MB \n", size*sizeof(int)/1000000  );
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
+    #ifdef PRINT_GPU_MEMORY
+  chprintf("Allocating GPU Memory:  %ld  MB free \n", global_free / 1000000);
+    #endif
+  if (global_free < size * sizeof(int)) {
+    printf("ERROR: Not enough global device memory \n");
+    printf(" Available Memory: %ld  MB \n", global_free / 1000000);
+    printf(" Requested Memory: %ld  MB \n", size * sizeof(int) / 1000000);
     exit(-1);
   }
-  CudaSafeCall( cudaMalloc((void**)array_dev,  size*sizeof(int)) );
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(int)));
   cudaDeviceSynchronize();
 }
 
-void Particles_3D::Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ){
+void Particles3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size)
+{
   size_t global_free, global_total;
-  CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) );
-  #ifdef PRINT_GPU_MEMORY
-  chprintf( "Allocating GPU Memory:  %ld  MB free \n", global_free/1000000);
-  #endif
-  if ( global_free < size*sizeof(bool) ){
-    printf( "ERROR: Not enough global device memory \n" );
-    printf( " Available Memory: %ld  MB \n", global_free/1000000  );
-    printf( " Requested Memory: %ld  MB \n", size*sizeof(bool)/1000000  );
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
+    #ifdef PRINT_GPU_MEMORY
+  chprintf("Allocating GPU Memory:  %ld  MB free \n", global_free / 1000000);
+    #endif
+  if (global_free < size * sizeof(part_int_t)) {
+    printf("ERROR: Not enough global device memory \n");
+    printf(" Available Memory: %ld  MB \n", global_free / 1000000);
+    printf(" Requested Memory: %ld  MB \n", size * sizeof(part_int_t) / 1000000);
     exit(-1);
   }
-  CudaSafeCall( cudaMalloc((void**)array_dev,  size*sizeof(bool)) );
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(part_int_t)));
   cudaDeviceSynchronize();
 }
 
-void Particles_3D::Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size){
-  CudaSafeCall( cudaMemcpy(array_dev, array_host, size*sizeof(Real), cudaMemcpyHostToDevice) );
+void Particles3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size)
+{
+  size_t global_free, global_total;
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
+    #ifdef PRINT_GPU_MEMORY
+  chprintf("Allocating GPU Memory:  %ld  MB free \n", global_free / 1000000);
+    #endif
+  if (global_free < size * sizeof(bool)) {
+    printf("ERROR: Not enough global device memory \n");
+    printf(" Available Memory: %ld  MB \n", global_free / 1000000);
+    printf(" Requested Memory: %ld  MB \n", size * sizeof(bool) / 1000000);
+    exit(-1);
+  }
+  GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(bool)));
   cudaDeviceSynchronize();
 }
 
-void Particles_3D::Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size){
-  CudaSafeCall( cudaMemcpy(array_host, array_dev, size*sizeof(Real), cudaMemcpyDeviceToHost) );
+void Particles3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size)
+{
+  GPU_Error_Check(cudaMemcpy(array_dev, array_host, size * sizeof(Real), cudaMemcpyHostToDevice));
   cudaDeviceSynchronize();
 }
 
-
-
-__global__ void Set_Particles_Array_Real_Kernel( Real value, Real *array_dev, part_int_t size ){
-  int tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid < size ) array_dev[tid] = value;
+void Particles3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size)
+{
+  GPU_Error_Check(cudaMemcpy(array_host, array_dev, size * sizeof(Real), cudaMemcpyDeviceToHost));
+  cudaDeviceSynchronize();
 }
 
+void Particles3D::Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev,
+                                                          part_int_t size)
+{
+  GPU_Error_Check(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), cudaMemcpyHostToDevice));
+  cudaDeviceSynchronize();
+}
 
+void Particles3D::Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host,
+                                                          part_int_t size)
+{
+  GPU_Error_Check(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), cudaMemcpyDeviceToHost));
+  cudaDeviceSynchronize();
+}
 
-void Particles_3D::Set_Particles_Array_Real( Real value, Real *array_dev, part_int_t size){
+__global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, part_int_t size)
+{
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid < size) {
+    array_dev[tid] = value;
+  }
+}
 
+void Particles3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size)
+{
   // set values for GPU kernels
-  int ngrid =  (size + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  int ngrid = (size - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
-  hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, 0,  value, array_dev, size);
-  CudaCheckError();
+  hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, 0, value, array_dev, size);
+  GPU_Error_Check();
 }
 
-
-
-
-
-
-
-#endif //PARTICLES_GPU
-#endif//PARTICLES
+  #endif  // PARTICLES_GPU
+#endif    // PARTICLES
diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp
index b03b6038d..96e4f110e 100644
--- a/src/particles/particles_boundaries.cpp
+++ b/src/particles/particles_boundaries.cpp
@@ -1,26 +1,27 @@
 #ifdef PARTICLES
 
-#include <unistd.h>
-#include <algorithm>
-#include <iostream>
-#include "../grid/grid3D.h"
-#include "../io/io.h"
-#include "../particles/particles_3D.h"
+  #include <unistd.h>
 
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#ifdef PARTICLES_GPU
-#include "../particles/particles_boundaries_gpu.h"
-#include "../utils/gpu_arrays_functions.h"
-#endif//PARTICLES_GPU
-#endif//MPI_CHOLLA
+  #include <algorithm>
+  #include <iostream>
 
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "particles_3D.h"
 
-//Transfer the particles that moved outside the local domain
-void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){
-
+  #ifdef MPI_CHOLLA
+    #include "../mpi/mpi_routines.h"
+    #ifdef PARTICLES_GPU
+      #include "../utils/gpu_arrays_functions.h"
+      #include "particles_boundaries_gpu.h"
+    #endif  // PARTICLES_GPU
+  #endif    // MPI_CHOLLA
 
-  //Transfer Particles Boundaries
+// Transfer the particles that moved outside the local domain
+void Grid3D::Transfer_Particles_Boundaries(struct Parameters P)
+{
+  GPU_Error_Check();
+  // Transfer Particles Boundaries
   Particles.TRANSFER_PARTICLES_BOUNDARIES = true;
   #ifdef CPU_TIME
   Timer.Part_Boundaries.Start();
@@ -30,572 +31,636 @@ void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){
   Timer.Part_Boundaries.End();
   #endif
   Particles.TRANSFER_PARTICLES_BOUNDARIES = false;
-
+  GPU_Error_Check();
 }
 
-#ifdef MPI_CHOLLA
-//Remove the particles that were transferred outside the local domain
-void Grid3D::Finish_Particles_Transfer( void ){
-
-  #ifdef PARTICLES_CPU
+  #ifdef MPI_CHOLLA
+// Remove the particles that were transferred outside the local domain
+void Grid3D::Finish_Particles_Transfer(void)
+{
+    #ifdef PARTICLES_CPU
   Particles.Remove_Transfered_Particles();
-  #endif
-
+    #endif
 }
 
-
-//Wait for the MPI request and unload the transferred particles
+// Wait for the MPI request and unload the transferred particles
 void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags)
 {
-
   int iwait;
-  int index = 0;
-  int wait_max=0;
+  int index    = 0;
+  int wait_max = 0;
   MPI_Status status;
 
-
-  //find out how many recvs we need to wait for
-  if (dir==0) {
-    if(flags[0] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[1] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
+  // find out how many recvs we need to wait for
+  if (dir == 0) {
+    if (flags[0] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[1] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
   }
-  if (dir==1) {
-    if(flags[2] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[3] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
+  if (dir == 1) {
+    if (flags[2] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[3] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
   }
-  if (dir==2) {
-    if(flags[4] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[5] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
+  if (dir == 2) {
+    if (flags[4] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[5] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
   }
 
-  //wait for any receives to complete
-  for(iwait=0;iwait<wait_max;iwait++)
-  {
-    //wait for recv completion
-    MPI_Waitany(wait_max,recv_request_particles_transfer,&index,&status);
-    //depending on which face arrived, load the buffer into the ghost grid
-    Unload_Particles_From_Buffers_BLOCK(status.MPI_TAG, flags );
+  // wait for any receives to complete
+  for (iwait = 0; iwait < wait_max; iwait++) {
+    // wait for recv completion
+    MPI_Waitany(wait_max, recv_request_particles_transfer, &index, &status);
+    // depending on which face arrived, load the buffer into the ghost grid
+    Unload_Particles_From_Buffers_BLOCK(status.MPI_TAG, flags);
   }
 }
 
-//Unload the particles after MPI tranfer for a single index ( axis and side )
-void Grid3D::Unload_Particles_From_Buffers_BLOCK(int index, int *flags ){
-
+// Unload the particles after MPI tranfer for a single index ( axis and side )
+void Grid3D::Unload_Particles_From_Buffers_BLOCK(int index, int *flags)
+{
   // Make sure not to unload when not transfering particles
-  if ( Particles.TRANSFER_DENSITY_BOUNDARIES ) return;
-  if ( H.TRANSFER_HYDRO_BOUNDARIES ) return;
-  if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ) return;
-
-  if( index == 0) Unload_Particles_from_Buffer_X0( flags );
-  if( index == 1) Unload_Particles_from_Buffer_X1( flags );
-  if( index == 2) Unload_Particles_from_Buffer_Y0( flags );
-  if( index == 3) Unload_Particles_from_Buffer_Y1( flags );
-  if( index == 4) Unload_Particles_from_Buffer_Z0( flags );
-  if( index == 5) Unload_Particles_from_Buffer_Z1( flags );
-}
+  if (Particles.TRANSFER_DENSITY_BOUNDARIES) {
+    return;
+  }
+  if (H.TRANSFER_HYDRO_BOUNDARIES) {
+    return;
+  }
+  if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) {
+    return;
+  }
 
+  if (index == 0) {
+    Unload_Particles_from_Buffer_X0(flags);
+  }
+  if (index == 1) {
+    Unload_Particles_from_Buffer_X1(flags);
+  }
+  if (index == 2) {
+    Unload_Particles_from_Buffer_Y0(flags);
+  }
+  if (index == 3) {
+    Unload_Particles_from_Buffer_Y1(flags);
+  }
+  if (index == 4) {
+    Unload_Particles_from_Buffer_Z0(flags);
+  }
+  if (index == 5) {
+    Unload_Particles_from_Buffer_Z1(flags);
+  }
+}
 
-//Wait for the Number of particles that will be transferred, and request the MPI_Recv to receive the MPI buffer
+// Wait for the Number of particles that will be transferred, and request the
+// MPI_Recv to receive the MPI buffer
 void Grid3D::Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, int *flags)
 {
-  #ifdef PARTICLES
-  if ( !Particles.TRANSFER_PARTICLES_BOUNDARIES ) return;
-  #endif
+    #ifdef PARTICLES
+  if (!Particles.TRANSFER_PARTICLES_BOUNDARIES) {
+    return;
+  }
+    #endif
 
   int iwait;
-  int index = 0;
-  int wait_max=0;
+  int index    = 0;
+  int wait_max = 0;
   MPI_Status status;
 
-  //find out how many recvs we need to wait for
-  if (dir==0) {
-    if(flags[0] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[1] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
+  // find out how many recvs we need to wait for
+  if (dir == 0) {
+    if (flags[0] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[1] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
   }
-  if (dir==1) {
-    if(flags[2] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[3] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
+  if (dir == 1) {
+    if (flags[2] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[3] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
   }
-  if (dir==2) {
-    if(flags[4] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
-    if(flags[5] == 5) //there is communication on this face
-      wait_max++;   //so we'll need to wait for its comm
+  if (dir == 2) {
+    if (flags[4] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
+    if (flags[5] == 5) {  // there is communication on this face
+      wait_max++;         // so we'll need to wait for its comm
+    }
   }
 
   int ireq_particles_transfer = 0;
-  //wait for any receives to complete
-  for(iwait=0;iwait<wait_max;iwait++)
-  {
-    //wait for recv completion
-    MPI_Waitany(wait_max,recv_request_n_particles,&index,&status);
-    //depending on which face arrived, load the buffer into the ghost grid
+  // wait for any receives to complete
+  for (iwait = 0; iwait < wait_max; iwait++) {
+    // wait for recv completion
+    MPI_Waitany(wait_max, recv_request_n_particles, &index, &status);
+    // depending on which face arrived, load the buffer into the ghost grid
     Load_NTtransfer_and_Request_Receive_Particles_Transfer(status.MPI_TAG, &ireq_particles_transfer);
   }
 }
 
-//Load the Number of particles that will be received (Particles.n_recv) and make the MPI_Irecv request for that buffer size
-void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, int *ireq_particles_transfer){
-
+// Load the Number of particles that will be received (Particles.n_recv) and
+// make the MPI_Irecv request for that buffer size
+void Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, int *ireq_particles_transfer)
+{
   int buffer_length;
 
-  #ifdef PARTICLES_GPU
-    #ifdef MPI_GPU
-    Real *recv_buffer_x0_particles = d_recv_buffer_x0_particles;
-    Real *recv_buffer_x1_particles = d_recv_buffer_x1_particles;
-    Real *recv_buffer_y0_particles = d_recv_buffer_y0_particles;
-    Real *recv_buffer_y1_particles = d_recv_buffer_y1_particles;
-    Real *recv_buffer_z0_particles = d_recv_buffer_z0_particles;
-    Real *recv_buffer_z1_particles = d_recv_buffer_z1_particles;
-    #else
-    Real *recv_buffer_x0_particles = h_recv_buffer_x0_particles;
-    Real *recv_buffer_x1_particles = h_recv_buffer_x1_particles;
-    Real *recv_buffer_y0_particles = h_recv_buffer_y0_particles;
-    Real *recv_buffer_y1_particles = h_recv_buffer_y1_particles;
-    Real *recv_buffer_z0_particles = h_recv_buffer_z0_particles;
-    Real *recv_buffer_z1_particles = h_recv_buffer_z1_particles;
+    #ifdef PARTICLES_GPU
+      #ifdef MPI_GPU
+  Real *recv_buffer_x0_particles = d_recv_buffer_x0_particles;
+  Real *recv_buffer_x1_particles = d_recv_buffer_x1_particles;
+  Real *recv_buffer_y0_particles = d_recv_buffer_y0_particles;
+  Real *recv_buffer_y1_particles = d_recv_buffer_y1_particles;
+  Real *recv_buffer_z0_particles = d_recv_buffer_z0_particles;
+  Real *recv_buffer_z1_particles = d_recv_buffer_z1_particles;
+      #else
+  Real *recv_buffer_x0_particles = h_recv_buffer_x0_particles;
+  Real *recv_buffer_x1_particles = h_recv_buffer_x1_particles;
+  Real *recv_buffer_y0_particles = h_recv_buffer_y0_particles;
+  Real *recv_buffer_y1_particles = h_recv_buffer_y1_particles;
+  Real *recv_buffer_z0_particles = h_recv_buffer_z0_particles;
+  Real *recv_buffer_z1_particles = h_recv_buffer_z1_particles;
+      #endif
     #endif
-  #endif
 
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   Real *recv_buffer_x0_particles = h_recv_buffer_x0_particles;
   Real *recv_buffer_x1_particles = h_recv_buffer_x1_particles;
   Real *recv_buffer_y0_particles = h_recv_buffer_y0_particles;
   Real *recv_buffer_y1_particles = h_recv_buffer_y1_particles;
   Real *recv_buffer_z0_particles = h_recv_buffer_z0_particles;
   Real *recv_buffer_z1_particles = h_recv_buffer_z1_particles;
-  #endif
+    #endif
 
-  if ( index == 0){
+  if (index == 0) {
     buffer_length = Particles.n_recv_x0 * N_DATA_PER_PARTICLE_TRANSFER;
     #ifdef PARTICLES_GPU
-    #ifdef MPI_GPU 
-    if ( buffer_length > Particles.G.recv_buffer_size_x0 ){
-      printf( "Extending Particles Transfer Buffer  ");
-      Extend_GPU_Array_Real( &recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0, Particles.G.gpu_allocation_factor*buffer_length, true );
-      Particles.G.recv_buffer_size_x0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length;
+      #ifdef MPI_GPU
+    if (buffer_length > Particles.G.recv_buffer_size_x0) {
+      printf("Extending Particles Transfer Buffer  ");
+      Extend_GPU_Array(&recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0,
+                       Particles.G.gpu_allocation_factor * buffer_length, true);
+      Particles.G.recv_buffer_size_x0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length;
     }
-    #else
-    Check_and_Grow_Particles_Buffer( &recv_buffer_x0_particles , &buffer_length_particles_x0_recv, buffer_length );
-    #endif
+      #else
+    Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, &buffer_length_particles_x0_recv, buffer_length);
+      #endif
     #endif
     #ifdef PARTICLES_CPU
-    Check_and_Grow_Particles_Buffer( &recv_buffer_x0_particles , &buffer_length_particles_x0_recv, buffer_length );
+    Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, &buffer_length_particles_x0_recv, buffer_length);
     #endif
-    // if ( Particles.n_recv_x0 > 0 ) std::cout << " Recv X0: " << Particles.n_recv_x0 << std::endl;
-    MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request_particles_transfer[*ireq_particles_transfer]);
+    // if ( Particles.n_recv_x0 > 0 ) std::cout << " Recv X0: " <<
+    // Particles.n_recv_x0 << std::endl;
+    MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, world,
+              &recv_request_particles_transfer[*ireq_particles_transfer]);
   }
-  if ( index == 1){
+  if (index == 1) {
     buffer_length = Particles.n_recv_x1 * N_DATA_PER_PARTICLE_TRANSFER;
     #ifdef PARTICLES_GPU
-    #ifdef MPI_GPU 
-    if ( buffer_length > Particles.G.recv_buffer_size_x1 ){
-      printf( "Extending Particles Transfer Buffer  ");
-      Extend_GPU_Array_Real( &recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1, Particles.G.gpu_allocation_factor*buffer_length, true  );
-      Particles.G.recv_buffer_size_x1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length;
+      #ifdef MPI_GPU
+    if (buffer_length > Particles.G.recv_buffer_size_x1) {
+      printf("Extending Particles Transfer Buffer  ");
+      Extend_GPU_Array(&recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1,
+                       Particles.G.gpu_allocation_factor * buffer_length, true);
+      Particles.G.recv_buffer_size_x1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length;
     }
-    #else
-    Check_and_Grow_Particles_Buffer( &recv_buffer_x1_particles , &buffer_length_particles_x1_recv, buffer_length );
-    #endif
+      #else
+    Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, &buffer_length_particles_x1_recv, buffer_length);
+      #endif
     #endif
     #ifdef PARTICLES_CPU
-    Check_and_Grow_Particles_Buffer( &recv_buffer_x1_particles , &buffer_length_particles_x1_recv, buffer_length );
+    Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, &buffer_length_particles_x1_recv, buffer_length);
     #endif
-    // if ( Particles.n_recv_x1 > 0 ) if ( Particles.n_recv_x1 > 0 ) std::cout << " Recv X1:  " << Particles.n_recv_x1 <<  "  " << procID <<  "  from "  <<  source[1] <<  std::endl;
-    MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request_particles_transfer[*ireq_particles_transfer]);
+    // if ( Particles.n_recv_x1 > 0 ) if ( Particles.n_recv_x1 > 0 ) std::cout
+    // << " Recv X1:  " << Particles.n_recv_x1 <<  "  " << procID <<  "  from "
+    // <<  source[1] <<  std::endl;
+    MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, world,
+              &recv_request_particles_transfer[*ireq_particles_transfer]);
   }
-  if ( index == 2){
+  if (index == 2) {
     buffer_length = Particles.n_recv_y0 * N_DATA_PER_PARTICLE_TRANSFER;
     #ifdef PARTICLES_GPU
-    #ifdef MPI_GPU 
-    if ( buffer_length > Particles.G.recv_buffer_size_y0 ){
-      printf( "Extending Particles Transfer Buffer  ");
-      Extend_GPU_Array_Real( &recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0, Particles.G.gpu_allocation_factor*buffer_length, true  );
-      Particles.G.recv_buffer_size_y0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length;
+      #ifdef MPI_GPU
+    if (buffer_length > Particles.G.recv_buffer_size_y0) {
+      printf("Extending Particles Transfer Buffer  ");
+      Extend_GPU_Array(&recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0,
+                       Particles.G.gpu_allocation_factor * buffer_length, true);
+      Particles.G.recv_buffer_size_y0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length;
     }
-    #else
-    Check_and_Grow_Particles_Buffer( &recv_buffer_y0_particles , &buffer_length_particles_y0_recv, buffer_length );
-    #endif
+      #else
+    Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, &buffer_length_particles_y0_recv, buffer_length);
+      #endif
     #endif
     #ifdef PARTICLES_CPU
-    Check_and_Grow_Particles_Buffer( &recv_buffer_y0_particles , &buffer_length_particles_y0_recv, buffer_length );
+    Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, &buffer_length_particles_y0_recv, buffer_length);
     #endif
-    // if ( Particles.n_recv_y0 > 0 ) std::cout << " Recv Y0: " << Particles.n_recv_y0 << std::endl;
-    MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request_particles_transfer[*ireq_particles_transfer]);
+    // if ( Particles.n_recv_y0 > 0 ) std::cout << " Recv Y0: " <<
+    // Particles.n_recv_y0 << std::endl;
+    MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, world,
+              &recv_request_particles_transfer[*ireq_particles_transfer]);
   }
-  if ( index == 3){
+  if (index == 3) {
     buffer_length = Particles.n_recv_y1 * N_DATA_PER_PARTICLE_TRANSFER;
     #ifdef PARTICLES_GPU
-    #ifdef MPI_GPU
-    if ( buffer_length > Particles.G.recv_buffer_size_y1 ){ 
-      printf( "Extending Particles Transfer Buffer  ");
-      Extend_GPU_Array_Real( &recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1, Particles.G.gpu_allocation_factor*buffer_length, true  );
-      Particles.G.recv_buffer_size_y1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length;
+      #ifdef MPI_GPU
+    if (buffer_length > Particles.G.recv_buffer_size_y1) {
+      printf("Extending Particles Transfer Buffer  ");
+      Extend_GPU_Array(&recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1,
+                       Particles.G.gpu_allocation_factor * buffer_length, true);
+      Particles.G.recv_buffer_size_y1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length;
     }
-    #else
-    Check_and_Grow_Particles_Buffer( &recv_buffer_y1_particles , &buffer_length_particles_y1_recv, buffer_length );
-    #endif
+      #else
+    Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, &buffer_length_particles_y1_recv, buffer_length);
+      #endif
     #endif
     #ifdef PARTICLES_CPU
-    Check_and_Grow_Particles_Buffer( &recv_buffer_y1_particles , &buffer_length_particles_y1_recv, buffer_length );
+    Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, &buffer_length_particles_y1_recv, buffer_length);
     #endif
-    // if ( Particles.n_recv_y1 > 0 ) std::cout << " Recv Y1: " << Particles.n_recv_y1 << std::endl;
-    MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request_particles_transfer[*ireq_particles_transfer]);
+    // if ( Particles.n_recv_y1 > 0 ) std::cout << " Recv Y1: " <<
+    // Particles.n_recv_y1 << std::endl;
+    MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, world,
+              &recv_request_particles_transfer[*ireq_particles_transfer]);
   }
-  if ( index == 4){
+  if (index == 4) {
     buffer_length = Particles.n_recv_z0 * N_DATA_PER_PARTICLE_TRANSFER;
     #ifdef PARTICLES_GPU
-    #ifdef MPI_GPU 
-    if ( buffer_length > Particles.G.recv_buffer_size_z0 ){
-      printf( "Extending Particles Transfer Buffer  ");
-      Extend_GPU_Array_Real( &recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0, Particles.G.gpu_allocation_factor*buffer_length, true  );
-      Particles.G.recv_buffer_size_z0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length;
+      #ifdef MPI_GPU
+    if (buffer_length > Particles.G.recv_buffer_size_z0) {
+      printf("Extending Particles Transfer Buffer  ");
+      Extend_GPU_Array(&recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0,
+                       Particles.G.gpu_allocation_factor * buffer_length, true);
+      Particles.G.recv_buffer_size_z0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length;
     }
-    #else
-    Check_and_Grow_Particles_Buffer( &recv_buffer_z0_particles , &buffer_length_particles_z0_recv, buffer_length );
-    #endif
+      #else
+    Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, &buffer_length_particles_z0_recv, buffer_length);
+      #endif
     #endif
     #ifdef PARTICLES_CPU
-    Check_and_Grow_Particles_Buffer( &recv_buffer_z0_particles , &buffer_length_particles_z0_recv, buffer_length );
+    Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, &buffer_length_particles_z0_recv, buffer_length);
     #endif
-    // if ( Particles.n_recv_z0 > 0 ) std::cout << " Recv Z0: " << Particles.n_recv_z0 << std::endl;
-    MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request_particles_transfer[*ireq_particles_transfer]);
+    // if ( Particles.n_recv_z0 > 0 ) std::cout << " Recv Z0: " <<
+    // Particles.n_recv_z0 << std::endl;
+    MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, world,
+              &recv_request_particles_transfer[*ireq_particles_transfer]);
   }
-  if ( index == 5){
+  if (index == 5) {
     buffer_length = Particles.n_recv_z1 * N_DATA_PER_PARTICLE_TRANSFER;
     #ifdef PARTICLES_GPU
-    #ifdef MPI_GPU 
-    if ( buffer_length > Particles.G.recv_buffer_size_z1 ){
-      printf( "Extending Particles Transfer Buffer  ");
-      Extend_GPU_Array_Real( &recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1, Particles.G.gpu_allocation_factor*buffer_length, true  );
-      Particles.G.recv_buffer_size_z1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length;
+      #ifdef MPI_GPU
+    if (buffer_length > Particles.G.recv_buffer_size_z1) {
+      printf("Extending Particles Transfer Buffer  ");
+      Extend_GPU_Array(&recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1,
+                       Particles.G.gpu_allocation_factor * buffer_length, true);
+      Particles.G.recv_buffer_size_z1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length;
     }
-    #else
-    Check_and_Grow_Particles_Buffer( &recv_buffer_z1_particles , &buffer_length_particles_z1_recv, buffer_length );
-    #endif
+      #else
+    Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, &buffer_length_particles_z1_recv, buffer_length);
+      #endif
     #endif
     #ifdef PARTICLES_CPU
-    Check_and_Grow_Particles_Buffer( &recv_buffer_z1_particles , &buffer_length_particles_z1_recv, buffer_length );
+    Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, &buffer_length_particles_z1_recv, buffer_length);
     #endif
-    // if ( Particles.n_recv_z1 >0 ) std::cout << " Recv Z1: " << Particles.n_recv_z1 << std::endl;
-    MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request_particles_transfer[*ireq_particles_transfer]);
+    // if ( Particles.n_recv_z1 >0 ) std::cout << " Recv Z1: " <<
+    // Particles.n_recv_z1 << std::endl;
+    MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, world,
+              &recv_request_particles_transfer[*ireq_particles_transfer]);
   }
 
   *ireq_particles_transfer += 1;
 }
 
-
-//Make Send and Receive request for the number of particles that will be transferred, and then load and send the transfer particles
-void Grid3D::Load_and_Send_Particles_X0( int ireq_n_particles, int ireq_particles_transfer ){
+// Make Send and Receive request for the number of particles that will be
+// transferred, and then load and send the transfer particles
+void Grid3D::Load_and_Send_Particles_X0(int ireq_n_particles, int ireq_particles_transfer)
+{
   int buffer_length;
   Real *send_buffer_x0_particles;
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   send_buffer_x0_particles = d_send_buffer_x0_particles;
-  Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles,  buffer_length_particles_x0_send );
-  #endif //PARTICLES_GPU
+  Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send);
+    #endif  // PARTICLES_GPU
 
   MPI_Irecv(&Particles.n_recv_x0, 1, MPI_PART_INT, source[0], 0, world, &recv_request_n_particles[ireq_n_particles]);
-  MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0],   1, world, &send_request_n_particles[0]);
+  MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0], 1, world, &send_request_n_particles[0]);
   MPI_Request_free(send_request_n_particles);
-  // if ( Particles.n_send_x0 > 0 )   if ( Particles.n_send_x0 > 0 ) std::cout << " Sent X0:  " << Particles.n_send_x0 <<  "  " << procID <<  "  to  "  <<  dest[0] <<  std::endl;
+  // if ( Particles.n_send_x0 > 0 )   if ( Particles.n_send_x0 > 0 ) std::cout
+  // << " Sent X0:  " << Particles.n_send_x0 <<  "  " << procID <<  "  to  "  <<
+  // dest[0] <<  std::endl;
   buffer_length = Particles.n_send_x0 * N_DATA_PER_PARTICLE_TRANSFER;
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   send_buffer_x0_particles = h_send_buffer_x0_particles;
-  Check_and_Grow_Particles_Buffer( &send_buffer_x0_particles , &buffer_length_particles_x0_send, buffer_length );
-  Particles.Load_Particles_to_Buffer_CPU( 0, 0, send_buffer_x0_particles,  buffer_length_particles_x0_send );
-  #endif //PARTICLES_CPU
+  Check_and_Grow_Particles_Buffer(&send_buffer_x0_particles, &buffer_length_particles_x0_send, buffer_length);
+  Particles.Load_Particles_to_Buffer_CPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send);
+    #endif  // PARTICLES_CPU
 
-  #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
-  cudaMemcpy(h_send_buffer_x0_particles, d_send_buffer_x0_particles,
-             buffer_length*sizeof(Real), cudaMemcpyDeviceToHost);
+    #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
+  cudaMemcpy(h_send_buffer_x0_particles, d_send_buffer_x0_particles, buffer_length * sizeof(Real),
+             cudaMemcpyDeviceToHost);
   send_buffer_x0_particles = h_send_buffer_x0_particles;
-  #endif
+    #endif
 
-  MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0],   1, world, &send_request_particles_transfer[ireq_particles_transfer]);
-  MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer);
+  MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0], 1, world,
+            &send_request_particles_transfer[ireq_particles_transfer]);
+  MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer);
 }
 
-void Grid3D::Load_and_Send_Particles_X1( int ireq_n_particles, int ireq_particles_transfer ){
+void Grid3D::Load_and_Send_Particles_X1(int ireq_n_particles, int ireq_particles_transfer)
+{
   int buffer_length;
   Real *send_buffer_x1_particles;
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   send_buffer_x1_particles = d_send_buffer_x1_particles;
-  Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles,  buffer_length_particles_x1_send );
-  #endif //PARTICLES_GPU
+  Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send);
+    #endif  // PARTICLES_GPU
 
   MPI_Irecv(&Particles.n_recv_x1, 1, MPI_PART_INT, source[1], 1, world, &recv_request_n_particles[ireq_n_particles]);
-  MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1],   0, world, &send_request_n_particles[1]);
-  MPI_Request_free(send_request_n_particles+1);
-  // if ( Particles.n_send_x1 > 0 )  std::cout << " Sent X1: " << Particles.n_send_x1 << std::endl;
+  MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1], 0, world, &send_request_n_particles[1]);
+  MPI_Request_free(send_request_n_particles + 1);
+  // if ( Particles.n_send_x1 > 0 )  std::cout << " Sent X1: " <<
+  // Particles.n_send_x1 << std::endl;
   buffer_length = Particles.n_send_x1 * N_DATA_PER_PARTICLE_TRANSFER;
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   send_buffer_x1_particles = h_send_buffer_x1_particles;
-  Check_and_Grow_Particles_Buffer( &send_buffer_x1_particles , &buffer_length_particles_x1_send, buffer_length );
-  Particles.Load_Particles_to_Buffer_CPU( 0, 1, send_buffer_x1_particles,  buffer_length_particles_x1_send  );
-  #endif //PARTICLES_CPU
+  Check_and_Grow_Particles_Buffer(&send_buffer_x1_particles, &buffer_length_particles_x1_send, buffer_length);
+  Particles.Load_Particles_to_Buffer_CPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send);
+    #endif  // PARTICLES_CPU
 
-  #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
-  cudaMemcpy(h_send_buffer_x1_particles, d_send_buffer_x1_particles,
-             buffer_length*sizeof(Real), cudaMemcpyDeviceToHost);
+    #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
+  cudaMemcpy(h_send_buffer_x1_particles, d_send_buffer_x1_particles, buffer_length * sizeof(Real),
+             cudaMemcpyDeviceToHost);
   send_buffer_x1_particles = h_send_buffer_x1_particles;
-  #endif
+    #endif
 
-  MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1],   0, world, &send_request_particles_transfer[ireq_particles_transfer]);\
-  MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer);
+  MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1], 0, world,
+            &send_request_particles_transfer[ireq_particles_transfer]);
+  MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer);
 }
 
-void Grid3D::Load_and_Send_Particles_Y0( int ireq_n_particles, int ireq_particles_transfer ){
+void Grid3D::Load_and_Send_Particles_Y0(int ireq_n_particles, int ireq_particles_transfer)
+{
   int buffer_length;
   Real *send_buffer_y0_particles;
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   send_buffer_y0_particles = d_send_buffer_y0_particles;
-  Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles,  buffer_length_particles_y0_send );
-  #endif //PARTICLES_GPU
+  Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send);
+    #endif  // PARTICLES_GPU
 
-  MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2],   3, world, &send_request_n_particles[0]);
+  MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2], 3, world, &send_request_n_particles[0]);
   MPI_Request_free(send_request_n_particles);
   MPI_Irecv(&Particles.n_recv_y0, 1, MPI_PART_INT, source[2], 2, world, &recv_request_n_particles[ireq_n_particles]);
-  // if ( Particles.n_send_y0 > 0 )   std::cout << " Sent Y0: " << Particles.n_send_y0 << std::endl;
+  // if ( Particles.n_send_y0 > 0 )   std::cout << " Sent Y0: " <<
+  // Particles.n_send_y0 << std::endl;
   buffer_length = Particles.n_send_y0 * N_DATA_PER_PARTICLE_TRANSFER;
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   send_buffer_y0_particles = h_send_buffer_y0_particles;
-  Check_and_Grow_Particles_Buffer( &send_buffer_y0_particles , &buffer_length_particles_y0_send, buffer_length );
-  Particles.Load_Particles_to_Buffer_CPU( 1, 0, send_buffer_y0_particles,  buffer_length_particles_y0_send  );
-  #endif //PARTICLES_CPU
+  Check_and_Grow_Particles_Buffer(&send_buffer_y0_particles, &buffer_length_particles_y0_send, buffer_length);
+  Particles.Load_Particles_to_Buffer_CPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send);
+    #endif  // PARTICLES_CPU
 
-  #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
-  cudaMemcpy(h_send_buffer_y0_particles, d_send_buffer_y0_particles,
-             buffer_length*sizeof(Real), cudaMemcpyDeviceToHost);
+    #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
+  cudaMemcpy(h_send_buffer_y0_particles, d_send_buffer_y0_particles, buffer_length * sizeof(Real),
+             cudaMemcpyDeviceToHost);
   send_buffer_y0_particles = h_send_buffer_y0_particles;
-  #endif
+    #endif
 
-  MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2],   3, world, &send_request_particles_transfer[ireq_particles_transfer]);
-  MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer);
+  MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2], 3, world,
+            &send_request_particles_transfer[ireq_particles_transfer]);
+  MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer);
 }
 
-void Grid3D::Load_and_Send_Particles_Y1( int ireq_n_particles, int ireq_particles_transfer ){
+void Grid3D::Load_and_Send_Particles_Y1(int ireq_n_particles, int ireq_particles_transfer)
+{
   int buffer_length;
   Real *send_buffer_y1_particles;
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   send_buffer_y1_particles = d_send_buffer_y1_particles;
-  Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles,  buffer_length_particles_y1_send );
-  #endif //PARTICLES_GPU
+  Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send);
+    #endif  // PARTICLES_GPU
 
-  MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3],   2, world, &send_request_n_particles[1]);
-  MPI_Request_free(send_request_n_particles+1);
+  MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3], 2, world, &send_request_n_particles[1]);
+  MPI_Request_free(send_request_n_particles + 1);
   MPI_Irecv(&Particles.n_recv_y1, 1, MPI_PART_INT, source[3], 3, world, &recv_request_n_particles[ireq_n_particles]);
-  // if ( Particles.n_send_y1 > 0 )  std::cout << " Sent Y1: " << Particles.n_send_y1 << std::endl;
+  // if ( Particles.n_send_y1 > 0 )  std::cout << " Sent Y1: " <<
+  // Particles.n_send_y1 << std::endl;
   buffer_length = Particles.n_send_y1 * N_DATA_PER_PARTICLE_TRANSFER;
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   send_buffer_y1_particles = h_send_buffer_y1_particles;
-  Check_and_Grow_Particles_Buffer( &send_buffer_y1_particles , &buffer_length_particles_y1_send, buffer_length );
-  Particles.Load_Particles_to_Buffer_CPU( 1, 1, send_buffer_y1_particles,  buffer_length_particles_y1_send  );
-  #endif //PARTICLES_CPU
+  Check_and_Grow_Particles_Buffer(&send_buffer_y1_particles, &buffer_length_particles_y1_send, buffer_length);
+  Particles.Load_Particles_to_Buffer_CPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send);
+    #endif  // PARTICLES_CPU
 
-  #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
-  cudaMemcpy(h_send_buffer_y1_particles, d_send_buffer_y1_particles,
-             buffer_length*sizeof(Real), cudaMemcpyDeviceToHost);
+    #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
+  cudaMemcpy(h_send_buffer_y1_particles, d_send_buffer_y1_particles, buffer_length * sizeof(Real),
+             cudaMemcpyDeviceToHost);
   send_buffer_y1_particles = h_send_buffer_y1_particles;
-  #endif
+    #endif
 
-  MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3],   2, world, &send_request_particles_transfer[ireq_particles_transfer]);
-  MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer);
+  MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3], 2, world,
+            &send_request_particles_transfer[ireq_particles_transfer]);
+  MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer);
 }
 
-void Grid3D::Load_and_Send_Particles_Z0( int ireq_n_particles, int ireq_particles_transfer ){
+void Grid3D::Load_and_Send_Particles_Z0(int ireq_n_particles, int ireq_particles_transfer)
+{
   int buffer_length;
   Real *send_buffer_z0_particles;
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   send_buffer_z0_particles = d_send_buffer_z0_particles;
-  Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles,  buffer_length_particles_z0_send );
-  #endif //PARTICLES_GPU
+  Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send);
+    #endif  // PARTICLES_GPU
 
-  MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4],   5, world, &send_request_n_particles[0]);
+  MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4], 5, world, &send_request_n_particles[0]);
   MPI_Request_free(send_request_n_particles);
   MPI_Irecv(&Particles.n_recv_z0, 1, MPI_PART_INT, source[4], 4, world, &recv_request_n_particles[ireq_n_particles]);
-  // if ( Particles.n_send_z0 > 0 )   std::cout << " Sent Z0: " << Particles.n_send_z0 << std::endl;
+  // if ( Particles.n_send_z0 > 0 )   std::cout << " Sent Z0: " <<
+  // Particles.n_send_z0 << std::endl;
   buffer_length = Particles.n_send_z0 * N_DATA_PER_PARTICLE_TRANSFER;
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   send_buffer_z0_particles = h_send_buffer_z0_particles;
-  Check_and_Grow_Particles_Buffer( &send_buffer_z0_particles , &buffer_length_particles_z0_send, buffer_length );
-  Particles.Load_Particles_to_Buffer_CPU( 2, 0, send_buffer_z0_particles,  buffer_length_particles_z0_send  );
-  #endif //PARTICLES_CPU
+  Check_and_Grow_Particles_Buffer(&send_buffer_z0_particles, &buffer_length_particles_z0_send, buffer_length);
+  Particles.Load_Particles_to_Buffer_CPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send);
+    #endif  // PARTICLES_CPU
 
-  #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
-  cudaMemcpy(h_send_buffer_z0_particles, d_send_buffer_z0_particles,
-             buffer_length*sizeof(Real), cudaMemcpyDeviceToHost);
+    #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
+  cudaMemcpy(h_send_buffer_z0_particles, d_send_buffer_z0_particles, buffer_length * sizeof(Real),
+             cudaMemcpyDeviceToHost);
   send_buffer_z0_particles = h_send_buffer_z0_particles;
-  #endif
+    #endif
 
-  MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4],   5, world, &send_request_particles_transfer[ireq_particles_transfer]);
-  MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer);
+  MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4], 5, world,
+            &send_request_particles_transfer[ireq_particles_transfer]);
+  MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer);
 }
 
-void Grid3D::Load_and_Send_Particles_Z1( int ireq_n_particles, int ireq_particles_transfer ){
+void Grid3D::Load_and_Send_Particles_Z1(int ireq_n_particles, int ireq_particles_transfer)
+{
   int buffer_length;
   Real *send_buffer_z1_particles;
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   send_buffer_z1_particles = d_send_buffer_z1_particles;
-  Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles,  buffer_length_particles_z1_send );
-  #endif //PARTICLES_GPU
+  Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send);
+    #endif  // PARTICLES_GPU
 
-  MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5],   4, world, &send_request_n_particles[1]);
-  MPI_Request_free(send_request_n_particles+1);
+  MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, &send_request_n_particles[1]);
+  MPI_Request_free(send_request_n_particles + 1);
   MPI_Irecv(&Particles.n_recv_z1, 1, MPI_PART_INT, source[5], 5, world, &recv_request_n_particles[ireq_n_particles]);
-  // if ( Particles.n_send_z1 > 0 )   std::cout << " Sent Z1: " << Particles.n_send_z1 << std::endl;
+  // if ( Particles.n_send_z1 > 0 )   std::cout << " Sent Z1: " <<
+  // Particles.n_send_z1 << std::endl;
   buffer_length = Particles.n_send_z1 * N_DATA_PER_PARTICLE_TRANSFER;
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
   send_buffer_z1_particles = h_send_buffer_z1_particles;
-  Check_and_Grow_Particles_Buffer( &send_buffer_z1_particles , &buffer_length_particles_z1_send, buffer_length );
-  Particles.Load_Particles_to_Buffer_CPU( 2, 1, send_buffer_z1_particles,  buffer_length_particles_z1_send  );
-  #endif //PARTICLES_CPU
+  Check_and_Grow_Particles_Buffer(&send_buffer_z1_particles, &buffer_length_particles_z1_send, buffer_length);
+  Particles.Load_Particles_to_Buffer_CPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send);
+    #endif  // PARTICLES_CPU
 
-  #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
-  cudaMemcpy(h_send_buffer_z1_particles, d_send_buffer_z1_particles,
-             buffer_length*sizeof(Real), cudaMemcpyDeviceToHost);
+    #if defined(PARTICLES_GPU) && !defined(MPI_GPU)
+  cudaMemcpy(h_send_buffer_z1_particles, d_send_buffer_z1_particles, buffer_length * sizeof(Real),
+             cudaMemcpyDeviceToHost);
   send_buffer_z1_particles = h_send_buffer_z1_particles;
-  #endif
+    #endif
 
-  MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5],   4, world, &send_request_particles_transfer[ireq_particles_transfer]);
-  MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer);
+  MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5], 4, world,
+            &send_request_particles_transfer[ireq_particles_transfer]);
+  MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer);
 }
 
-//Unload the Transferred particles from the MPI_buffer, after buffer was received
-void Grid3D::Unload_Particles_from_Buffer_X0( int *flags ){
-  #ifdef PARTICLES_CPU
-  Particles.Unload_Particles_from_Buffer_CPU( 0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0,
-      h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles,
-      h_send_buffer_z1_particles, buffer_length_particles_y0_send, buffer_length_particles_y1_send,
-      buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
-  #endif//PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  #ifndef MPI_GPU
-  cudaMemcpy(d_recv_buffer_x0_particles, h_recv_buffer_x0_particles,
-             buffer_length_particles_x0_recv*sizeof(Real),
+// Unload the Transferred particles from the MPI_buffer, after buffer was
+// received
+void Grid3D::Unload_Particles_from_Buffer_X0(int *flags)
+{
+    #ifdef PARTICLES_CPU
+  Particles.Unload_Particles_from_Buffer_CPU(
+      0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0, h_send_buffer_y0_particles, h_send_buffer_y1_particles,
+      h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send,
+      buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+      #ifndef MPI_GPU
+  cudaMemcpy(d_recv_buffer_x0_particles, h_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real),
              cudaMemcpyHostToDevice);
-  #endif
-  Particles.Unload_Particles_from_Buffer_GPU( 0, 0, d_recv_buffer_x0_particles, Particles.n_recv_x0 );
-  #endif//PARTICLES_GPU
+      #endif
+  Particles.Unload_Particles_from_Buffer_GPU(0, 0, d_recv_buffer_x0_particles, Particles.n_recv_x0);
+    #endif  // PARTICLES_GPU
 }
 
-void Grid3D::Unload_Particles_from_Buffer_X1( int *flags  ){
-  #ifdef PARTICLES_CPU
-  Particles.Unload_Particles_from_Buffer_CPU( 0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1,
-      h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles,
-      h_send_buffer_z1_particles, buffer_length_particles_y0_send, buffer_length_particles_y1_send,
-      buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
-  #endif//PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  #ifndef MPI_GPU
-  cudaMemcpy(d_recv_buffer_x1_particles, h_recv_buffer_x1_particles,
-             buffer_length_particles_x1_recv*sizeof(Real),
+void Grid3D::Unload_Particles_from_Buffer_X1(int *flags)
+{
+    #ifdef PARTICLES_CPU
+  Particles.Unload_Particles_from_Buffer_CPU(
+      0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1, h_send_buffer_y0_particles, h_send_buffer_y1_particles,
+      h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send,
+      buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+      #ifndef MPI_GPU
+  cudaMemcpy(d_recv_buffer_x1_particles, h_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real),
              cudaMemcpyHostToDevice);
-  #endif
-  Particles.Unload_Particles_from_Buffer_GPU( 0, 1, d_recv_buffer_x1_particles, Particles.n_recv_x1 );
-  #endif//PARTICLES_GPU
+      #endif
+  Particles.Unload_Particles_from_Buffer_GPU(0, 1, d_recv_buffer_x1_particles, Particles.n_recv_x1);
+    #endif  // PARTICLES_GPU
 }
 
-void Grid3D::Unload_Particles_from_Buffer_Y0( int *flags ){
-  #ifdef PARTICLES_CPU
-  Particles.Unload_Particles_from_Buffer_CPU( 1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0,
-      h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles,
-      h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send,
-      buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
-  #endif//PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  #ifndef MPI_GPU
-  cudaMemcpy(d_recv_buffer_y0_particles, h_recv_buffer_y0_particles,
-             buffer_length_particles_y0_recv*sizeof(Real),
+void Grid3D::Unload_Particles_from_Buffer_Y0(int *flags)
+{
+    #ifdef PARTICLES_CPU
+  Particles.Unload_Particles_from_Buffer_CPU(
+      1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0, h_send_buffer_y0_particles, h_send_buffer_y1_particles,
+      h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send,
+      buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+      #ifndef MPI_GPU
+  cudaMemcpy(d_recv_buffer_y0_particles, h_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real),
              cudaMemcpyHostToDevice);
-  #endif
-  Particles.Unload_Particles_from_Buffer_GPU( 1, 0, d_recv_buffer_y0_particles, Particles.n_recv_y0 );
-  #endif//PARTICLES_GPU
+      #endif
+  Particles.Unload_Particles_from_Buffer_GPU(1, 0, d_recv_buffer_y0_particles, Particles.n_recv_y0);
+    #endif  // PARTICLES_GPU
 }
 
-void Grid3D::Unload_Particles_from_Buffer_Y1( int *flags  ){
-  #ifdef PARTICLES_CPU
-  Particles.Unload_Particles_from_Buffer_CPU( 1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1,
-      h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles,
-      h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send,
-      buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
-  #endif//PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  #ifndef MPI_GPU
-  cudaMemcpy(d_recv_buffer_y1_particles, h_recv_buffer_y1_particles,
-             buffer_length_particles_y1_recv*sizeof(Real),
+void Grid3D::Unload_Particles_from_Buffer_Y1(int *flags)
+{
+    #ifdef PARTICLES_CPU
+  Particles.Unload_Particles_from_Buffer_CPU(
+      1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1, h_send_buffer_y0_particles, h_send_buffer_y1_particles,
+      h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send,
+      buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+      #ifndef MPI_GPU
+  cudaMemcpy(d_recv_buffer_y1_particles, h_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real),
              cudaMemcpyHostToDevice);
-  #endif
-  Particles.Unload_Particles_from_Buffer_GPU( 1, 1, d_recv_buffer_y1_particles, Particles.n_recv_y1 );
-  #endif//PARTICLES_GPU
+      #endif
+  Particles.Unload_Particles_from_Buffer_GPU(1, 1, d_recv_buffer_y1_particles, Particles.n_recv_y1);
+    #endif  // PARTICLES_GPU
 }
 
-void Grid3D::Unload_Particles_from_Buffer_Z0( int *flags ){
-  #ifdef PARTICLES_CPU
-  Particles.Unload_Particles_from_Buffer_CPU( 2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0,
-      h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles,
-      h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send,
-      buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
-  #endif//PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  #ifndef MPI_GPU
-  cudaMemcpy(d_recv_buffer_z0_particles, h_recv_buffer_z0_particles,
-             buffer_length_particles_z0_recv*sizeof(Real),
+void Grid3D::Unload_Particles_from_Buffer_Z0(int *flags)
+{
+    #ifdef PARTICLES_CPU
+  Particles.Unload_Particles_from_Buffer_CPU(
+      2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0, h_send_buffer_y0_particles, h_send_buffer_y1_particles,
+      h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send,
+      buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+      #ifndef MPI_GPU
+  cudaMemcpy(d_recv_buffer_z0_particles, h_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real),
              cudaMemcpyHostToDevice);
-  #endif
-  Particles.Unload_Particles_from_Buffer_GPU( 2, 0, d_recv_buffer_z0_particles, Particles.n_recv_z0 );
-  #endif//PARTICLES_GPU
+      #endif
+  Particles.Unload_Particles_from_Buffer_GPU(2, 0, d_recv_buffer_z0_particles, Particles.n_recv_z0);
+    #endif  // PARTICLES_GPU
 }
 
-void Grid3D::Unload_Particles_from_Buffer_Z1( int *flags ){
-  #ifdef PARTICLES_CPU
-  Particles.Unload_Particles_from_Buffer_CPU( 2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1,
-      h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles,
-      h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send,
-      buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
-  #endif//PARTICLES_CPU
-  #ifdef PARTICLES_GPU
-  #ifndef MPI_GPU
-  cudaMemcpy(d_recv_buffer_z1_particles, h_recv_buffer_z1_particles,
-             buffer_length_particles_z1_recv*sizeof(Real),
+void Grid3D::Unload_Particles_from_Buffer_Z1(int *flags)
+{
+    #ifdef PARTICLES_CPU
+  Particles.Unload_Particles_from_Buffer_CPU(
+      2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1, h_send_buffer_y0_particles, h_send_buffer_y1_particles,
+      h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send,
+      buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags);
+    #endif  // PARTICLES_CPU
+    #ifdef PARTICLES_GPU
+      #ifndef MPI_GPU
+  cudaMemcpy(d_recv_buffer_z1_particles, h_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real),
              cudaMemcpyHostToDevice);
-  #endif
-  Particles.Unload_Particles_from_Buffer_GPU( 2, 1, d_recv_buffer_z1_particles, Particles.n_recv_z1 );
-  #endif//PARTICLES_GPU
+      #endif
+  Particles.Unload_Particles_from_Buffer_GPU(2, 1, d_recv_buffer_z1_particles, Particles.n_recv_z1);
+    #endif  // PARTICLES_GPU
 }
 
-
-//Find the particles that moved outside the local domain in order to transfer them.
-void Particles_3D::Select_Particles_to_Transfer_All( int *flags ){
-
-  #ifdef PARTICLES_CPU
-  Select_Particles_to_Transfer_All_CPU( flags );
-  #endif//PARTICLES_CPU
+// Find the particles that moved outside the local domain in order to transfer
+// them.
+void Particles3D::Select_Particles_to_Transfer_All(int *flags)
+{
+    #ifdef PARTICLES_CPU
+  Select_Particles_to_Transfer_All_CPU(flags);
+    #endif  // PARTICLES_CPU
 
   // When using PARTICLES_GPU the particles that need to be Transferred
   // are selected on the Load_Buffer_GPU functions
-
 }
 
-
-void Particles_3D::Clear_Particles_For_Transfer( void ){
-
-  //Set the number of transferred particles to 0.
+void Particles3D::Clear_Particles_For_Transfer(void)
+{
+  // Set the number of transferred particles to 0.
   n_transfer_x0 = 0;
   n_transfer_x1 = 0;
   n_transfer_y0 = 0;
@@ -603,7 +668,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){
   n_transfer_z0 = 0;
   n_transfer_z1 = 0;
 
-  //Set the number of send particles to 0.
+  // Set the number of send particles to 0.
   n_send_x0 = 0;
   n_send_x1 = 0;
   n_send_y0 = 0;
@@ -611,7 +676,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){
   n_send_z0 = 0;
   n_send_z1 = 0;
 
-  //Set the number of received particles to 0.
+  // Set the number of received particles to 0.
   n_recv_x0 = 0;
   n_recv_x1 = 0;
   n_recv_y0 = 0;
@@ -619,7 +684,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){
   n_recv_z0 = 0;
   n_recv_z1 = 0;
 
-  //Set the number of particles in transfer buffers to 0.
+  // Set the number of particles in transfer buffers to 0.
   n_in_buffer_x0 = 0;
   n_in_buffer_x1 = 0;
   n_in_buffer_y0 = 0;
@@ -627,264 +692,355 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){
   n_in_buffer_z0 = 0;
   n_in_buffer_z1 = 0;
 
-
-  #ifdef PARTICLES_CPU
-  //Clear the particles indices that were transferred during the previous timestep
+    #ifdef PARTICLES_CPU
+  // Clear the particles indices that were transferred during the previous
+  // timestep
   Clear_Vectors_For_Transfers();
-  #endif //PARTICLES_CPU
-
+    #endif  // PARTICLES_CPU
 }
 
-#ifdef PARTICLES_GPU
-
-int Particles_3D::Select_Particles_to_Transfer_GPU( int direction, int side ){
+    #ifdef PARTICLES_GPU
 
+int Particles3D::Select_Particles_to_Transfer_GPU(int direction, int side)
+{
   int n_transfer;
   Real *pos;
   Real domainMin, domainMax;
 
-  if ( direction == 0 ){
-    pos = pos_x_dev;
+  if (direction == 0) {
+    pos       = pos_x_dev;
     domainMax = G.xMax;
     domainMin = G.xMin;
   }
-  if ( direction == 1 ){
-    pos = pos_y_dev;
+  if (direction == 1) {
+    pos       = pos_y_dev;
     domainMax = G.yMax;
     domainMin = G.yMin;
   }
-  if ( direction == 2 ){
-    pos = pos_z_dev;
+  if (direction == 2) {
+    pos       = pos_z_dev;
     domainMax = G.zMax;
     domainMin = G.zMin;
   }
-
-  //Set the number of particles that will be sent and load the particles data into the transfer buffers
-  n_transfer = Select_Particles_to_Transfer_GPU_function(  n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d  );
-  CHECK(cudaDeviceSynchronize());
+  // chprintf("n_local=%d SELECT PARTICLES: %d dir, %d side. Max/Min %.4e/%.4e
+  // \n", n_local, direction, side, domainMax, domainMin); Set the number of
+  // particles that will be sent and load the particles data into the transfer
+  // buffers
+  n_transfer = Select_Particles_to_Transfer_GPU_function(
+      n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d,
+      G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d,
+      G.transfer_particles_prefix_sum_blocks_d);
+  GPU_Error_Check(cudaDeviceSynchronize());
 
   return n_transfer;
 }
 
-void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h, int buffer_length  ){
-
+void Particles3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h,
+                                                        int buffer_length)
+{
   part_int_t *n_send;
   int *buffer_size;
   int n_fields_to_transfer;
   Real *pos, *send_buffer_d;
   Real domainMin, domainMax;
   int bt_pos_x, bt_pos_y, bt_pos_z, bt_non_pos;
+  int field_id = -1;
 
-  bt_pos_x = -1;
-  bt_pos_y = -1;
-  bt_pos_z = -1;
+  bt_pos_x   = -1;
+  bt_pos_y   = -1;
+  bt_pos_z   = -1;
   bt_non_pos = -1;
 
-  if ( direction == 0 ){
-    pos = pos_x_dev;
+  if (direction == 0) {
+    pos       = pos_x_dev;
     domainMin = G.domainMin_x;
     domainMax = G.domainMax_x;
-    if ( side == 0 ){
-      n_send = &n_send_x0;
-      buffer_size = &G.send_buffer_size_x0;
+    if (side == 0) {
+      n_send        = &n_send_x0;
+      buffer_size   = &G.send_buffer_size_x0;
       send_buffer_d = G.send_buffer_x0_d;
-      bt_pos_x = G.boundary_type_x0;
+      bt_pos_x      = G.boundary_type_x0;
     }
-    if ( side == 1 ){
-      n_send = &n_send_x1;
-      buffer_size = &G.send_buffer_size_x1;
+    if (side == 1) {
+      n_send        = &n_send_x1;
+      buffer_size   = &G.send_buffer_size_x1;
       send_buffer_d = G.send_buffer_x1_d;
-      bt_pos_x = G.boundary_type_x1;
+      bt_pos_x      = G.boundary_type_x1;
     }
   }
-  if ( direction == 1 ){
-    pos = pos_y_dev;
+  if (direction == 1) {
+    pos       = pos_y_dev;
     domainMin = G.domainMin_y;
     domainMax = G.domainMax_y;
-    if ( side == 0 ){
-      n_send = &n_send_y0;
-      buffer_size = &G.send_buffer_size_y0;
+    if (side == 0) {
+      n_send        = &n_send_y0;
+      buffer_size   = &G.send_buffer_size_y0;
       send_buffer_d = G.send_buffer_y0_d;
-      bt_pos_y = G.boundary_type_y0;
+      bt_pos_y      = G.boundary_type_y0;
     }
-    if ( side == 1 ){
-      n_send = &n_send_y1;
-      buffer_size = &G.send_buffer_size_y1;
+    if (side == 1) {
+      n_send        = &n_send_y1;
+      buffer_size   = &G.send_buffer_size_y1;
       send_buffer_d = G.send_buffer_y1_d;
-      bt_pos_y = G.boundary_type_y1;
+      bt_pos_y      = G.boundary_type_y1;
     }
   }
-  if ( direction == 2 ){
-    pos = pos_z_dev;
+  if (direction == 2) {
+    pos       = pos_z_dev;
     domainMin = G.domainMin_z;
     domainMax = G.domainMax_z;
-    if ( side == 0 ){
-      n_send = &n_send_z0;
-      buffer_size = &G.send_buffer_size_z0;
+    if (side == 0) {
+      n_send        = &n_send_z0;
+      buffer_size   = &G.send_buffer_size_z0;
       send_buffer_d = G.send_buffer_z0_d;
-      bt_pos_z = G.boundary_type_z0;
+      bt_pos_z      = G.boundary_type_z0;
     }
-    if ( side == 1 ){
-      n_send = &n_send_z1;
-      buffer_size = &G.send_buffer_size_z1;
+    if (side == 1) {
+      n_send        = &n_send_z1;
+      buffer_size   = &G.send_buffer_size_z1;
       send_buffer_d = G.send_buffer_z1_d;
-      bt_pos_z = G.boundary_type_z1;
+      bt_pos_z      = G.boundary_type_z1;
     }
   }
 
-
-
-  // If the number of particles in the array exceeds the size of the array, extend the array
-  if ( (*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER > *buffer_size  ){
-    printf( "Extending Particles Transfer Buffer  ");
-    Extend_GPU_Array_Real( &send_buffer_d, *buffer_size,  G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER, true  );
-    *buffer_size = (part_int_t) G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER;
+  // If the number of particles in the array exceeds the size of the array,
+  // extend the array
+  if ((*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER > *buffer_size) {
+    printf("Extending Particles Transfer Buffer  ");
+    Extend_GPU_Array(&send_buffer_d, *buffer_size,
+                     G.gpu_allocation_factor * (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER, true);
+    *buffer_size = (part_int_t)G.gpu_allocation_factor * (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER;
   }
 
   // Load the particles that will be transferred into the buffers
   n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER;
-  Load_Particles_to_Transfer_GPU_function( n_transfer, 0, n_fields_to_transfer, pos_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_x );
-  Load_Particles_to_Transfer_GPU_function( n_transfer, 1, n_fields_to_transfer, pos_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_y );
-  Load_Particles_to_Transfer_GPU_function( n_transfer, 2, n_fields_to_transfer, pos_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_z );
-  Load_Particles_to_Transfer_GPU_function( n_transfer, 3, n_fields_to_transfer, vel_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos );
-  Load_Particles_to_Transfer_GPU_function( n_transfer, 4, n_fields_to_transfer, vel_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos );
-  Load_Particles_to_Transfer_GPU_function( n_transfer, 5, n_fields_to_transfer, vel_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos );
-
-  CHECK(cudaDeviceSynchronize());
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_x_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_pos_x);
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_y_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_pos_y);
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_z_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_pos_z);
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_x_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_non_pos);
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_y_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_non_pos);
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_z_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_non_pos);
+      #ifndef SINGLE_PARTICLE_MASS
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, mass_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_non_pos);
+      #endif
+      #ifdef PARTICLE_IDS
+  Load_Particles_to_Transfer_Int_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, partIDs_dev,
+                                              G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                              bt_non_pos);
+      #endif
+      #ifdef PARTICLE_AGE
+  Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, age_dev,
+                                          G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax,
+                                          bt_non_pos);
+      #endif
+  GPU_Error_Check(cudaDeviceSynchronize());
 
   *n_send += n_transfer;
   // if ( *n_send > 0 ) printf( "###Transfered %ld  particles\n", *n_send);
-
-
 }
 
-
-void Particles_3D::Replace_Tranfered_Particles_GPU( int n_transfer ){
-
+void Particles3D::Replace_Tranfered_Particles_GPU(int n_transfer)
+{
   // Replace the particles that were transferred
-  Replace_Transfered_Particles_GPU_function( n_transfer, pos_x_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false );
-  Replace_Transfered_Particles_GPU_function( n_transfer, pos_y_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false );
-  Replace_Transfered_Particles_GPU_function( n_transfer, pos_z_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false );
-  Replace_Transfered_Particles_GPU_function( n_transfer, vel_x_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false );
-  Replace_Transfered_Particles_GPU_function( n_transfer, vel_y_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false );
-  Replace_Transfered_Particles_GPU_function( n_transfer, vel_z_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false );
-
-  CHECK(cudaDeviceSynchronize());
+  Replace_Transfered_Particles_GPU_function(n_transfer, pos_x_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+  Replace_Transfered_Particles_GPU_function(n_transfer, pos_y_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+  Replace_Transfered_Particles_GPU_function(n_transfer, pos_z_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+  Replace_Transfered_Particles_GPU_function(n_transfer, vel_x_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+  Replace_Transfered_Particles_GPU_function(n_transfer, vel_y_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+  Replace_Transfered_Particles_GPU_function(n_transfer, vel_z_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+      #ifndef SINGLE_PARTICLE_MASS
+  Replace_Transfered_Particles_GPU_function(n_transfer, mass_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+      #endif
+      #ifdef PARTICLE_IDS
+  Replace_Transfered_Particles_Int_GPU_function(n_transfer, partIDs_dev, G.transfer_particles_indices_d,
+                                                G.replace_particles_indices_d, false);
+      #endif
+      #ifdef PARTICLE_AGE
+  Replace_Transfered_Particles_GPU_function(n_transfer, age_dev, G.transfer_particles_indices_d,
+                                            G.replace_particles_indices_d, false);
+      #endif
+
+  GPU_Error_Check(cudaDeviceSynchronize());
   // Update the local number of particles
   n_local -= n_transfer;
-
 }
 
-
-void Particles_3D::Load_Particles_to_Buffer_GPU( int direction, int side, Real *send_buffer_h, int buffer_length ){
-
+void Particles3D::Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer_h, int buffer_length)
+{
   int n_transfer;
+  n_transfer = Select_Particles_to_Transfer_GPU(direction, side);
 
-  n_transfer = Select_Particles_to_Transfer_GPU( direction, side );
-
-  Copy_Transfer_Particles_to_Buffer_GPU( n_transfer, direction, side, send_buffer_h, buffer_length );
-
-  Replace_Tranfered_Particles_GPU( n_transfer );
+  Copy_Transfer_Particles_to_Buffer_GPU(n_transfer, direction, side, send_buffer_h, buffer_length);
 
+  Replace_Tranfered_Particles_GPU(n_transfer);
 }
 
+/**
+ * Open boundary conditions follows the same logic as
+ * Load_Particles_to_Buffer_GPU, except that the particles that are selected for
+ * transfer are not moved into any buffer (Copy_Transfer_Particles_to_Buffer_GPU
+ * step is skipped).  Also the domainMix/domainMax are the global min/max
+ * values.
+ */
+void Particles3D::Set_Particles_Open_Boundary_GPU(int dir, int side)
+{
+  int n_transfer;
+  /*Real *pos;
+  Real domainMin, domainMax;
 
-void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ){
+  if ( dir == 0 ){
+    domainMin = G.domainMin_x;
+    domainMax = G.domainMax_x;
+  }
+  if ( dir == 1 ){
+    domainMin = G.domainMin_y;
+    domainMax = G.domainMax_y;
+  }
+  if ( dir == 2 ){
+    domainMin = G.domainMin_z;
+    domainMax = G.domainMax_z;
+  }*/
+  n_transfer = Select_Particles_to_Transfer_GPU(dir, side);
+  // n_transfer = Select_Particles_to_Transfer_GPU_function(  n_local, side,
+  // domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h,
+  // G.transfer_particles_flags_d, G.transfer_particles_indices_d,
+  // G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d,
+  // G.transfer_particles_prefix_sum_blocks_d  );
+  // GPU_Error_Check(cudaDeviceSynchronize());
+  // chprintf("OPEN condition: removing %d\n", n_transfer);
+  Replace_Tranfered_Particles_GPU(n_transfer);
+}
 
+void Particles3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d)
+{
   int n_fields_to_transfer;
 
   part_int_t n_local_after = n_local + n_recv;
-  if ( n_local_after > particles_array_size ){
-    printf(" Reallocating GPU particles arrays. N local particles: %ld \n", n_local_after );
+  if (n_local_after > particles_array_size) {
+    printf(" Reallocating GPU particles arrays. N local particles: %ld \n", n_local_after);
     int new_size = G.gpu_allocation_factor * n_local_after;
-    Extend_GPU_Array_Real( &pos_x_dev,  (int) particles_array_size, new_size, true  );
-    Extend_GPU_Array_Real( &pos_y_dev,  (int) particles_array_size, new_size, false );
-    Extend_GPU_Array_Real( &pos_z_dev,  (int) particles_array_size, new_size, false );
-    Extend_GPU_Array_Real( &vel_x_dev,  (int) particles_array_size, new_size, false );
-    Extend_GPU_Array_Real( &vel_y_dev,  (int) particles_array_size, new_size, false );
-    Extend_GPU_Array_Real( &vel_z_dev,  (int) particles_array_size, new_size, false );
-    Extend_GPU_Array_Real( &grav_x_dev, (int) particles_array_size, new_size, false );
-    Extend_GPU_Array_Real( &grav_y_dev, (int) particles_array_size, new_size, false );
-    Extend_GPU_Array_Real( &grav_z_dev, (int) particles_array_size, new_size, false );
-    particles_array_size = (part_int_t) new_size;
+    Extend_GPU_Array(&pos_x_dev, (int)particles_array_size, new_size, true);
+    Extend_GPU_Array(&pos_y_dev, (int)particles_array_size, new_size, false);
+    Extend_GPU_Array(&pos_z_dev, (int)particles_array_size, new_size, false);
+    Extend_GPU_Array(&vel_x_dev, (int)particles_array_size, new_size, false);
+    Extend_GPU_Array(&vel_y_dev, (int)particles_array_size, new_size, false);
+    Extend_GPU_Array(&vel_z_dev, (int)particles_array_size, new_size, false);
+    Extend_GPU_Array(&grav_x_dev, (int)particles_array_size, new_size, false);
+    Extend_GPU_Array(&grav_y_dev, (int)particles_array_size, new_size, false);
+    Extend_GPU_Array(&grav_z_dev, (int)particles_array_size, new_size, false);
+      #ifndef SINGLE_PARTICLE_MASS
+    Extend_GPU_Array(&mass_dev, (int)particles_array_size, new_size, false);
+      #endif
+      #ifdef PARTICLE_IDS
+    Extend_GPU_Array(&partIDs_dev, (int)particles_array_size, new_size, false);
+      #endif
+      #ifdef PARTICLE_AGE
+    Extend_GPU_Array(&age_dev, (int)particles_array_size, new_size, false);
+      #endif
+    particles_array_size = (part_int_t)new_size;
     ReAllocate_Memory_GPU_MPI();
   }
 
   // Unload the particles that were transferred from the buffers
+  int field_id         = -1;
   n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER;
-  Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 0, n_fields_to_transfer, pos_x_dev, recv_buffer_d  );
-  Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 1, n_fields_to_transfer, pos_y_dev, recv_buffer_d  );
-  Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 2, n_fields_to_transfer, pos_z_dev, recv_buffer_d  );
-  Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 3, n_fields_to_transfer, vel_x_dev, recv_buffer_d  );
-  Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 4, n_fields_to_transfer, vel_y_dev, recv_buffer_d  );
-  Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 5, n_fields_to_transfer, vel_z_dev, recv_buffer_d  );
-  //
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_x_dev,
+                                            recv_buffer_d);
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_y_dev,
+                                            recv_buffer_d);
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_z_dev,
+                                            recv_buffer_d);
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_x_dev,
+                                            recv_buffer_d);
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_y_dev,
+                                            recv_buffer_d);
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_z_dev,
+                                            recv_buffer_d);
+      #ifndef SINGLE_PARTICLE_MASS
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, mass_dev, recv_buffer_d);
+      #endif
+      #ifdef PARTICLE_IDS
+  Unload_Particles_Int_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, partIDs_dev,
+                                                recv_buffer_d);
+      #endif
+      #ifdef PARTICLE_AGE
+  Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, age_dev, recv_buffer_d);
+      #endif
+
   n_local += n_recv;
   // if ( n_recv > 0 ) printf( "###Unloaded %d  particles\n", n_recv );
-
-
 }
 
-
-
-void Particles_3D::Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv ){
-
+void Particles3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv)
+{
   int buffer_size;
   Real domainMin, domainMax;
   Real *recv_buffer_d;
 
-  if ( direction == 0 ){
+  if (direction == 0) {
     domainMin = G.domainMin_x;
     domainMin = G.domainMax_x;
-    if ( side == 0 ){
+    if (side == 0) {
       buffer_size   = G.recv_buffer_size_x0;
       recv_buffer_d = G.recv_buffer_x0_d;
     }
-    if ( side == 1 ){
+    if (side == 1) {
       buffer_size   = G.recv_buffer_size_x1;
       recv_buffer_d = G.recv_buffer_x1_d;
     }
   }
-  if ( direction == 1 ){
+  if (direction == 1) {
     domainMin = G.domainMin_y;
     domainMin = G.domainMax_y;
-    if ( side == 0 ){
+    if (side == 0) {
       buffer_size   = G.recv_buffer_size_y0;
       recv_buffer_d = G.recv_buffer_y0_d;
     }
-    if ( side == 1 ){
+    if (side == 1) {
       buffer_size   = G.recv_buffer_size_y1;
       recv_buffer_d = G.recv_buffer_y1_d;
     }
   }
-  if ( direction == 2 ){
+  if (direction == 2) {
     domainMin = G.domainMin_z;
     domainMin = G.domainMax_z;
-    if ( side == 0 ){
+    if (side == 0) {
       buffer_size   = G.recv_buffer_size_z0;
       recv_buffer_d = G.recv_buffer_z0_d;
     }
-    if ( side == 1 ){
+    if (side == 1) {
       buffer_size   = G.recv_buffer_size_z1;
       recv_buffer_d = G.recv_buffer_z1_d;
     }
   }
 
-  CudaCheckError();
-
-  Copy_Transfer_Particles_from_Buffer_GPU( n_recv, recv_buffer_d );
+  GPU_Error_Check();
 
+  Copy_Transfer_Particles_from_Buffer_GPU(n_recv, recv_buffer_d);
 }
 
+    #endif  // PARTICLES_GPU
 
-
-
-#endif //PARTICLES_GPU
-
-
-
-
-
-
-#endif //MPI_CHOLLA
-#endif //PARTICLES
+  #endif  // MPI_CHOLLA
+#endif    // PARTICLES
diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp
index b90963b05..27470befe 100644
--- a/src/particles/particles_boundaries_cpu.cpp
+++ b/src/particles/particles_boundaries_cpu.cpp
@@ -1,67 +1,75 @@
 #if defined(PARTICLES) && defined(PARTICLES_CPU)
 
-#include <unistd.h>
-#include <algorithm>
-#include <iostream>
-#include "../grid/grid3D.h"
-#include "../io/io.h"
-#include "../particles/particles_3D.h"
+  #include <unistd.h>
 
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif
+  #include <algorithm>
+  #include <iostream>
 
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "particles_3D.h"
 
-//Get and remove Real value at index on vector
-Real Get_and_Remove_Real( part_int_t indx, real_vector_t &vec ){
+  #ifdef MPI_CHOLLA
+    #include "../mpi/mpi_routines.h"
+  #endif
+
+// Get and remove Real value at index on vector
+Real Get_and_Remove_Real(part_int_t indx, real_vector_t &vec)
+{
   Real value = vec[indx];
-  vec[indx] = vec.back(); //The item at the specified index is replaced by the last item in the vector
-  vec.pop_back(); //The last item in the vector is discarded
+  vec[indx]  = vec.back();  // The item at the specified index is replaced by the
+                            // last item in the vector
+  vec.pop_back();           // The last item in the vector is discarded
   return value;
 }
 
-//Remove Real value at index on vector
-void Remove_Real( part_int_t indx, real_vector_t &vec ){
-  vec[indx] = vec.back(); //The item at the specified index is replaced by the last item in the vector
-  vec.pop_back(); //The last item in the vector is discarded
+// Remove Real value at index on vector
+void Remove_Real(part_int_t indx, real_vector_t &vec)
+{
+  vec[indx] = vec.back();  // The item at the specified index is replaced by the
+                           // last item in the vector
+  vec.pop_back();          // The last item in the vector is discarded
 }
 
-//Get and remove integer value at index on vector
-Real Get_and_Remove_partID( part_int_t indx, int_vector_t &vec ){
-  Real value = (Real) vec[indx];
-  vec[indx] = vec.back();
+// Get and remove integer value at index on vector
+Real Get_and_Remove_partID(part_int_t indx, int_vector_t &vec)
+{
+  Real value = (Real)vec[indx];
+  vec[indx]  = vec.back();
   vec.pop_back();
   return value;
 }
 
-//Remove integer value at index on vector
-void Remove_ID( part_int_t indx, int_vector_t &vec ){
+// Remove integer value at index on vector
+void Remove_ID(part_int_t indx, int_vector_t &vec)
+{
   vec[indx] = vec.back();
   vec.pop_back();
 }
 
-//Convert Real to Integer for transfering particles IDs on Real buffer arrays
-part_int_t Real_to_part_int( Real inVal ){
-  part_int_t outVal = (part_int_t) inVal;
-  if ( (inVal - outVal) > 0.1 ) outVal += 1;
-  if ( fabs(outVal - inVal) > 0.5 ) outVal -= 1;
+// Convert Real to Integer for transfering particles IDs on Real buffer arrays
+part_int_t Real_to_part_int(Real inVal)
+{
+  part_int_t outVal = (part_int_t)inVal;
+  if ((inVal - outVal) > 0.1) outVal += 1;
+  if (fabs(outVal - inVal) > 0.5) outVal -= 1;
   return outVal;
 }
 
-//Set periodic boundaries for particles. Only when not using MPI
-void Grid3D::Set_Particles_Boundary( int dir, int side ){
-
+// Set periodic boundaries for particles. Only when not using MPI
+void Grid3D::Set_Particles_Boundary(int dir, int side)
+{
   Real d_min, d_max, L;
 
-  if ( dir == 0 ){
+  if (dir == 0) {
     d_min = Particles.G.xMin;
     d_max = Particles.G.xMax;
   }
-  if ( dir == 1 ){
+  if (dir == 1) {
     d_min = Particles.G.yMin;
     d_max = Particles.G.yMax;
   }
-  if ( dir == 2 ){
+  if (dir == 2) {
     d_min = Particles.G.zMin;
     d_max = Particles.G.zMax;
   }
@@ -71,250 +79,252 @@ void Grid3D::Set_Particles_Boundary( int dir, int side ){
   bool changed_pos;
   Real pos;
   #ifdef PARALLEL_OMP
-  #pragma omp parallel for private( pos, changed_pos) num_threads( N_OMP_THREADS )
+    #pragma omp parallel for private(pos, changed_pos) num_threads(N_OMP_THREADS)
   #endif
-  for( int i=0; i<Particles.n_local; i++){
-
-    if ( dir == 0 ) pos = Particles.pos_x[i];
-    if ( dir == 1 ) pos = Particles.pos_y[i];
-    if ( dir == 2 ) pos = Particles.pos_z[i];
+  for (int i = 0; i < Particles.n_local; i++) {
+    if (dir == 0) pos = Particles.pos_x[i];
+    if (dir == 1) pos = Particles.pos_y[i];
+    if (dir == 2) pos = Particles.pos_z[i];
 
     changed_pos = false;
-    if ( side == 0 ){
-      if ( pos < d_min ) pos += L; //When the position is on the left of the domain boundary, add the domain Length to the position
+    if (side == 0) {
+      if (pos < d_min)
+        pos += L;  // When the position is on the left of the domain boundary,
+                   // add the domain Length to the position
       changed_pos = true;
     }
-    if ( side == 1 ){
-      if ( pos >= d_max ) pos -= L;//When the position is on the right of the domain boundary, substract the domain Length to the position
+    if (side == 1) {
+      if (pos >= d_max)
+        pos -= L;  // When the position is on the right of the domain boundary,
+                   // substract the domain Length to the position
       changed_pos = true;
     }
 
-    //If the position was changed write the new position to the vectors
-    if ( !changed_pos ) continue;
-    if ( dir == 0 ) Particles.pos_x[i] = pos;
-    if ( dir == 1 ) Particles.pos_y[i] = pos;
-    if ( dir == 2 ) Particles.pos_z[i] = pos;
-
+    // If the position was changed write the new position to the vectors
+    if (!changed_pos) continue;
+    if (dir == 0) Particles.pos_x[i] = pos;
+    if (dir == 1) Particles.pos_y[i] = pos;
+    if (dir == 2) Particles.pos_z[i] = pos;
   }
 }
 
+// Set open boundaries for particles when not using MPI
+void Grid3D::Set_Particles_Open_Boundary_CPU(int dir, int side)
+{
+  Real d_min, d_max;
 
-//Set open boundaries for particles when not using MPI
-void Grid3D::Set_Particles_Open_Boundary( int dir, int side ){
-  Real d_min, d_max, L;
-
-  if ( dir == 0 ){
-    d_min = Particles.G.xMin;
-    d_max = Particles.G.xMax;
+  if (dir == 0) {
+    d_min = Particles.G.domainMin_x;
+    d_max = Particles.G.domainMax_x;
   }
-  if ( dir == 1 ){
-    d_min = Particles.G.yMin;
-    d_max = Particles.G.yMax;
+  if (dir == 1) {
+    d_min = Particles.G.domainMin_y;
+    d_max = Particles.G.domainMax_y;
   }
-  if ( dir == 2 ){
-    d_min = Particles.G.zMin;
-    d_max = Particles.G.zMax;
+  if (dir == 2) {
+    d_min = Particles.G.domainMin_z;
+    d_max = Particles.G.domainMax_z;
   }
 
-  L = d_max - d_min;
-
   Real pos;
   int_vector_t removed_indices;
 
   #ifdef PARALLEL_OMP
-  #pragma omp parallel for private(pos) num_threads( N_OMP_THREADS )
+    #pragma omp parallel for private(pos) num_threads(N_OMP_THREADS)
   #endif
-  for( int i=0; i<Particles.n_local; i++){
+  for (int i = 0; i < Particles.n_local; i++) {
+    if (dir == 0) pos = Particles.pos_x[i];
+    if (dir == 1) pos = Particles.pos_y[i];
+    if (dir == 2) pos = Particles.pos_z[i];
 
-    if ( dir == 0 ) pos = Particles.pos_x[i];
-    if ( dir == 1 ) pos = Particles.pos_y[i];
-    if ( dir == 2 ) pos = Particles.pos_z[i];
-
-    //If the position is out of the region, remove.
-    if (( side == 0 && pos < d_min ) || ( side == 1 && pos > d_max)) removed_indices.push_back(i);
+    // If the position is out of the region, remove.
+    if ((side == 0 && pos < d_min) || (side == 1 && pos > d_max)) removed_indices.push_back(i);
   }
   std::sort(removed_indices.begin(), removed_indices.end());
 
   part_int_t indx, pIndx;
   part_int_t n_delete = removed_indices.size();
-  for ( indx=0; indx<n_delete; indx++ ){
-    //From right to left get the index of the particle that will be deleted
+  for (indx = 0; indx < n_delete; indx++) {
+    // From right to left get the index of the particle that will be deleted
     pIndx = removed_indices.back();
-    //Remove the particle data at the selected index
-    Remove_Real( pIndx, Particles.pos_x );
-    Remove_Real( pIndx, Particles.pos_y );
-    Remove_Real( pIndx, Particles.pos_z );
-    Remove_Real( pIndx, Particles.vel_x );
-    Remove_Real( pIndx, Particles.vel_y );
-    Remove_Real( pIndx, Particles.vel_z );
-    Remove_Real( pIndx, Particles.grav_x );
-    Remove_Real( pIndx, Particles.grav_y );
-    Remove_Real( pIndx, Particles.grav_z );
-    #ifdef PARTICLE_IDS
-    Remove_ID( pIndx, Particles.partIDs );
-    #endif
-    #ifndef SINGLE_PARTICLE_MASS
-    Remove_Real( pIndx, Particles.mass );
-    #endif
-    #ifdef PARTICLE_AGE
+    // Remove the particle data at the selected index
+    Remove_Real(pIndx, Particles.pos_x);
+    Remove_Real(pIndx, Particles.pos_y);
+    Remove_Real(pIndx, Particles.pos_z);
+    Remove_Real(pIndx, Particles.vel_x);
+    Remove_Real(pIndx, Particles.vel_y);
+    Remove_Real(pIndx, Particles.vel_z);
+    Remove_Real(pIndx, Particles.grav_x);
+    Remove_Real(pIndx, Particles.grav_y);
+    Remove_Real(pIndx, Particles.grav_z);
+  #ifdef PARTICLE_IDS
+    Remove_ID(pIndx, Particles.partIDs);
+  #endif
+  #ifndef SINGLE_PARTICLE_MASS
+    Remove_Real(pIndx, Particles.mass);
+  #endif
+  #ifdef PARTICLE_AGE
     Remove_Real(pIndx, Particles.age);
-    #endif
+  #endif
     Particles.n_local -= 1;
-
   }
 }
 
+  #ifdef MPI_CHOLLA
 
-#ifdef MPI_CHOLLA
-
-
-//Find the particles that moved outside the local domain in order to transfer them.
-//The indices of selected particles are added to the out_indx_vectors
-void Particles_3D::Select_Particles_to_Transfer_All_CPU( int *flags ){
-
+// Find the particles that moved outside the local domain in order to transfer
+// them. The indices of selected particles are added to the out_indx_vectors
+void Particles3D::Select_Particles_to_Transfer_All_CPU(int *flags)
+{
   part_int_t pIndx;
-  for ( pIndx=0; pIndx<n_local; pIndx++ ){
-
-    if ( pos_x[pIndx] < G.xMin && flags[0]==5 ){
-      out_indxs_vec_x0.push_back( pIndx );
+  for (pIndx = 0; pIndx < n_local; pIndx++) {
+    if (pos_x[pIndx] < G.xMin && flags[0] == 5) {
+      out_indxs_vec_x0.push_back(pIndx);
       continue;
     }
-    if ( pos_x[pIndx] >= G.xMax && flags[1]==5 ){
-      out_indxs_vec_x1.push_back( pIndx );
+    if (pos_x[pIndx] >= G.xMax && flags[1] == 5) {
+      out_indxs_vec_x1.push_back(pIndx);
       continue;
     }
-    if ( pos_y[pIndx] < G.yMin && flags[2]==5 ){
-      out_indxs_vec_y0.push_back( pIndx );
+    if (pos_y[pIndx] < G.yMin && flags[2] == 5) {
+      out_indxs_vec_y0.push_back(pIndx);
       continue;
     }
-    if ( pos_y[pIndx] >= G.yMax && flags[3]==5 ){
-      out_indxs_vec_y1.push_back( pIndx );
+    if (pos_y[pIndx] >= G.yMax && flags[3] == 5) {
+      out_indxs_vec_y1.push_back(pIndx);
       continue;
     }
-    if ( pos_z[pIndx] < G.zMin && flags[4]==5 ){
-        out_indxs_vec_z0.push_back( pIndx );
+    if (pos_z[pIndx] < G.zMin && flags[4] == 5) {
+      out_indxs_vec_z0.push_back(pIndx);
       continue;
     }
-    if ( pos_z[pIndx] >= G.zMax && flags[5]==5 ){
-        out_indxs_vec_z1.push_back( pIndx );
+    if (pos_z[pIndx] >= G.zMax && flags[5] == 5) {
+      out_indxs_vec_z1.push_back(pIndx);
       continue;
     }
   }
 
-  //Sort the transfer Indices (NOT NEEDED: All indices are sorted at the end of the transfer before removing transferred particles )
-  // std::sort(out_indxs_vec_x0.begin(), out_indxs_vec_x0.end());
-  // std::sort(out_indxs_vec_x1.begin(), out_indxs_vec_x1.end());
-  // std::sort(out_indxs_vec_y0.begin(), out_indxs_vec_y0.end());
-  // std::sort(out_indxs_vec_y1.begin(), out_indxs_vec_y1.end());
-  // std::sort(out_indxs_vec_z0.begin(), out_indxs_vec_z0.end());
-  // std::sort(out_indxs_vec_z1.begin(), out_indxs_vec_z1.end());
-
-  //Add the size of the out_vectors to the number of particles that will be send in each direction
+  // Sort the transfer Indices (NOT NEEDED: All indices are sorted at the end of
+  // the transfer before removing transferred particles )
+  //  std::sort(out_indxs_vec_x0.begin(), out_indxs_vec_x0.end());
+  //  std::sort(out_indxs_vec_x1.begin(), out_indxs_vec_x1.end());
+  //  std::sort(out_indxs_vec_y0.begin(), out_indxs_vec_y0.end());
+  //  std::sort(out_indxs_vec_y1.begin(), out_indxs_vec_y1.end());
+  //  std::sort(out_indxs_vec_z0.begin(), out_indxs_vec_z0.end());
+  //  std::sort(out_indxs_vec_z1.begin(), out_indxs_vec_z1.end());
+
+  // Add the size of the out_vectors to the number of particles that will be
+  // send in each direction
   n_send_x0 += out_indxs_vec_x0.size();
   n_send_x1 += out_indxs_vec_x1.size();
   n_send_y0 += out_indxs_vec_y0.size();
   n_send_y1 += out_indxs_vec_y1.size();
   n_send_z0 += out_indxs_vec_z0.size();
   n_send_z1 += out_indxs_vec_z1.size();
-
 }
 
-
-//Load the particles that need to be transferred to the MPI buffer
-void Particles_3D::Load_Particles_to_Buffer_CPU( int direction, int side, Real *send_buffer, int buffer_length  ){
-
+// Load the particles that need to be transferred to the MPI buffer
+void Particles3D::Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length)
+{
   part_int_t n_out;
   part_int_t n_send;
   int_vector_t *out_indxs_vec;
   part_int_t *n_in_buffer;
 
-  //Depending on the direction and side select the vector with the particle indices for the transfer
-  if ( direction == 0 ){
-    if ( side == 0 ){
+  // Depending on the direction and side select the vector with the particle
+  // indices for the transfer
+  if (direction == 0) {
+    if (side == 0) {
       out_indxs_vec = &out_indxs_vec_x0;
-      n_send = n_send_x0;
-      n_in_buffer = &n_in_buffer_x0;
+      n_send        = n_send_x0;
+      n_in_buffer   = &n_in_buffer_x0;
     }
-    if ( side == 1 ){
+    if (side == 1) {
       out_indxs_vec = &out_indxs_vec_x1;
-      n_send = n_send_x1;
-      n_in_buffer = &n_in_buffer_x1;
+      n_send        = n_send_x1;
+      n_in_buffer   = &n_in_buffer_x1;
     }
   }
-  if ( direction == 1 ){
-    if ( side == 0 ){
+  if (direction == 1) {
+    if (side == 0) {
       out_indxs_vec = &out_indxs_vec_y0;
-      n_send = n_send_y0;
-      n_in_buffer = &n_in_buffer_y0;
+      n_send        = n_send_y0;
+      n_in_buffer   = &n_in_buffer_y0;
     }
-    if ( side == 1 ){
+    if (side == 1) {
       out_indxs_vec = &out_indxs_vec_y1;
-      n_send = n_send_y1;
-      n_in_buffer = &n_in_buffer_y1;
+      n_send        = n_send_y1;
+      n_in_buffer   = &n_in_buffer_y1;
     }
   }
-  if ( direction == 2 ){
-    if ( side == 0 ){
+  if (direction == 2) {
+    if (side == 0) {
       out_indxs_vec = &out_indxs_vec_z0;
-      n_send = n_send_z0;
-      n_in_buffer = &n_in_buffer_z0;
+      n_send        = n_send_z0;
+      n_in_buffer   = &n_in_buffer_z0;
     }
-    if ( side == 1 ){
+    if (side == 1) {
       out_indxs_vec = &out_indxs_vec_z1;
-      n_send = n_send_z1;
-      n_in_buffer = &n_in_buffer_z1;
+      n_send        = n_send_z1;
+      n_in_buffer   = &n_in_buffer_z1;
     }
   }
 
   part_int_t offset, offset_extra;
-  n_out = out_indxs_vec->size();  //Number of particles to be transferred
-  offset = *n_in_buffer*N_DATA_PER_PARTICLE_TRANSFER; //Offset in the array to take in to account the particles that already reside in the buffer array
+  n_out  = out_indxs_vec->size();                        // Number of particles to be transferred
+  offset = *n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER;  // Offset in the array to take in to
+                                                         // account the particles that already
+                                                         // reside in the buffer array
 
   part_int_t indx, pIndx;
-  for ( indx=0; indx<n_out; indx++ ){
-
-    pIndx = (*out_indxs_vec)[indx]; // Index of the particle that will be transferred
-    //Copy the particle data to the buffer array in the following order ( position, velocity )
-    send_buffer[ offset + 0 ] = pos_x[pIndx];
-    send_buffer[ offset + 1 ] = pos_y[pIndx];
-    send_buffer[ offset + 2 ] = pos_z[pIndx];
-    send_buffer[ offset + 3 ] = vel_x[pIndx];
-    send_buffer[ offset + 4 ] = vel_y[pIndx];
-    send_buffer[ offset + 5 ] = vel_z[pIndx];
+  for (indx = 0; indx < n_out; indx++) {
+    pIndx = (*out_indxs_vec)[indx];  // Index of the particle that will be
+                                     // transferred
+    // Copy the particle data to the buffer array in the following order (
+    // position, velocity )
+    send_buffer[offset + 0] = pos_x[pIndx];
+    send_buffer[offset + 1] = pos_y[pIndx];
+    send_buffer[offset + 2] = pos_z[pIndx];
+    send_buffer[offset + 3] = vel_x[pIndx];
+    send_buffer[offset + 4] = vel_y[pIndx];
+    send_buffer[offset + 5] = vel_z[pIndx];
 
     offset_extra = offset + 5;
     #ifndef SINGLE_PARTICLE_MASS
-    //Copy the particle mass to the buffer array in the following order ( position, velocity, mass )
+    // Copy the particle mass to the buffer array in the following order (
+    // position, velocity, mass )
     offset_extra += 1;
-    send_buffer[ offset_extra ] = mass[pIndx];
+    send_buffer[offset_extra] = mass[pIndx];
     #endif
     #ifdef PARTICLE_IDS
-    //Copy the particle mass to the buffer array in the following order ( position, velocity, mass, ID )
+    // Copy the particle mass to the buffer array in the following order (
+    // position, velocity, mass, ID )
     offset_extra += 1;
-    send_buffer[ offset_extra ] = (Real) partIDs[pIndx];
+    send_buffer[offset_extra] = (Real)partIDs[pIndx];
     #endif
     #ifdef PARTICLE_AGE
-    //Copy the particle age to the buffer array in the following order (position, velocity, mass, ID, age)
+    // Copy the particle age to the buffer array in the following order
+    // (position, velocity, mass, ID, age)
     offset_extra += 1;
     send_buffer[offset_extra] = age[pIndx];
     #endif
 
-    *n_in_buffer += 1; // add one to the number of particles in the transfer_buffer
+    *n_in_buffer += 1;  // add one to the number of particles in the transfer_buffer
     offset += N_DATA_PER_PARTICLE_TRANSFER;
-    //Check that the offset doesn't exceed the buffer size
-    if ( offset > buffer_length ) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl;
+    // Check that the offset doesn't exceed the buffer size
+    if (offset > buffer_length) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl;
   }
 }
 
-
-//Add the data of a single particle to a transfer buffer
-void Particles_3D::Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge,
-                            Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z){
-
+// Add the data of a single particle to a transfer buffer
+void Particles3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass,
+                                         Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y,
+                                         Real pVel_z)
+{
   int offset, offset_extra;
   offset = n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER;
 
-  if (offset > buffer_length ) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl;
+  if (offset > buffer_length) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl;
   buffer[offset + 0] = pPos_x;
   buffer[offset + 1] = pPos_y;
   buffer[offset + 2] = pPos_z;
@@ -323,78 +333,79 @@ void Particles_3D::Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer,
   buffer[offset + 5] = pVel_z;
 
   offset_extra = offset + 5;
-  #ifndef SINGLE_PARTICLE_MASS
+    #ifndef SINGLE_PARTICLE_MASS
   offset_extra += 1;
-  buffer[ offset_extra ] = pMass;
-  #endif
-  #ifdef PARTICLE_IDS
+  buffer[offset_extra] = pMass;
+    #endif
+    #ifdef PARTICLE_IDS
   offset_extra += 1;
   buffer[offset_extra] = pId;
-  #endif
-  #ifdef PARTICLE_AGE
+    #endif
+    #ifdef PARTICLE_AGE
   offset_extra += 1;
   buffer[offset_extra] = pAge;
-  #endif
+    #endif
 }
 
-
-//After a particle was transferred, add the transferred particle data to the vectors that contain the data of the local particles
-void Particles_3D::Add_Particle_To_Vectors( Real pId, Real pMass, Real pAge,
-                            Real pPos_x, Real pPos_y, Real pPos_z,
-                            Real pVel_x, Real pVel_y, Real pVel_z, int *flags ){
-
+// After a particle was transferred, add the transferred particle data to the
+// vectors that contain the data of the local particles
+void Particles3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z,
+                                          Real pVel_x, Real pVel_y, Real pVel_z, int *flags)
+{
   // Make sure that the particle position is inside the local domain
   bool in_local = true;
-  if ( pPos_x < G.xMin || pPos_x >= G.xMax ) in_local = false;
-  if ( ( pPos_y < G.yMin && flags[2]==5 ) || ( pPos_y >= G.yMax && flags[3]==5 ) ) in_local = false;
-  if ( ( pPos_z < G.zMin && flags[4]==5 ) || ( pPos_z >= G.zMax && flags[4]==5 ) ) in_local = false;
-  if ( ! in_local  ) {
+  if (pPos_x < G.xMin || pPos_x >= G.xMax) in_local = false;
+  if ((pPos_y < G.yMin && flags[2] == 5) || (pPos_y >= G.yMax && flags[3] == 5)) in_local = false;
+  if ((pPos_z < G.zMin && flags[4] == 5) || (pPos_z >= G.zMax && flags[4] == 5)) in_local = false;
+  if (!in_local) {
     std::cout << " Adding particle out of local domain to vectors Error:" << std::endl;
     #ifdef PARTICLE_IDS
     std::cout << " Particle outside Local  domain    pID: " << pId << std::endl;
     #else
     std::cout << " Particle outside Local  domain " << std::endl;
     #endif
-    std::cout << "  Domain X: " << G.xMin <<  "  " << G.xMax << std::endl;
-    std::cout << "  Domain Y: " << G.yMin <<  "  " << G.yMax << std::endl;
-    std::cout << "  Domain Z: " << G.zMin <<  "  " << G.zMax << std::endl;
+    std::cout << "  Domain X: " << G.xMin << "  " << G.xMax << std::endl;
+    std::cout << "  Domain Y: " << G.yMin << "  " << G.yMax << std::endl;
+    std::cout << "  Domain Z: " << G.zMin << "  " << G.zMax << std::endl;
     std::cout << "  Particle X: " << pPos_x << std::endl;
     std::cout << "  Particle Y: " << pPos_y << std::endl;
     std::cout << "  Particle Z: " << pPos_z << std::endl;
   }
-  //TODO: is it good enough to log the error (but then go ahead and add it to the vector)?
-
-  //Append the particle data to the local data vectors
-  pos_x.push_back( pPos_x );
-  pos_y.push_back( pPos_y );
-  pos_z.push_back( pPos_z );
-  vel_x.push_back( pVel_x );
-  vel_y.push_back( pVel_y );
-  vel_z.push_back( pVel_z );
-  #ifndef SINGLE_PARTICLE_MASS
-  mass.push_back( pMass );
-  #endif
-  #ifdef PARTICLE_IDS
-  partIDs.push_back( Real_to_part_int(pId) );
-  #endif
-  #ifdef PARTICLE_AGE
+  // TODO: is it good enough to log the error (but then go ahead and add it to
+  // the vector)?
+
+  // Append the particle data to the local data vectors
+  pos_x.push_back(pPos_x);
+  pos_y.push_back(pPos_y);
+  pos_z.push_back(pPos_z);
+  vel_x.push_back(pVel_x);
+  vel_y.push_back(pVel_y);
+  vel_z.push_back(pVel_z);
+    #ifndef SINGLE_PARTICLE_MASS
+  mass.push_back(pMass);
+    #endif
+    #ifdef PARTICLE_IDS
+  partIDs.push_back(Real_to_part_int(pId));
+    #endif
+    #ifdef PARTICLE_AGE
   age.push_back(pAge);
-  #endif
+    #endif
   grav_x.push_back(0);
   grav_y.push_back(0);
   grav_z.push_back(0);
 
-  //Add one to the local number of particles
+  // Add one to the local number of particles
   n_local += 1;
 }
 
-
-
-//After the MPI transfer, unload the particles data from the buffers
-void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Real *recv_buffer, part_int_t n_recv,
-      Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, int buffer_length_z0, int buffer_length_z1, int *flags){
-
-  //Loop over the data in the recv_buffer, get the data for each particle and append the particle data to the local vecors
+// After the MPI transfer, unload the particles data from the buffers
+void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv,
+                                                   Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0,
+                                                   Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1,
+                                                   int buffer_length_z0, int buffer_length_z1, int *flags)
+{
+  // Loop over the data in the recv_buffer, get the data for each particle and
+  // append the particle data to the local vecors
 
   int offset_buff, offset_extra;
   part_int_t pId;
@@ -402,25 +413,25 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re
 
   offset_buff = 0;
   part_int_t indx;
-  for ( indx=0; indx<n_recv; indx++ ){
-    //Get the data for each transferred particle
-    pPos_x = recv_buffer[ offset_buff + 0 ];
-    pPos_y = recv_buffer[ offset_buff + 1 ];
-    pPos_z = recv_buffer[ offset_buff + 2 ];
-    pVel_x = recv_buffer[ offset_buff + 3 ];
-    pVel_y = recv_buffer[ offset_buff + 4 ];
-    pVel_z = recv_buffer[ offset_buff + 5 ];
+  for (indx = 0; indx < n_recv; indx++) {
+    // Get the data for each transferred particle
+    pPos_x = recv_buffer[offset_buff + 0];
+    pPos_y = recv_buffer[offset_buff + 1];
+    pPos_z = recv_buffer[offset_buff + 2];
+    pVel_x = recv_buffer[offset_buff + 3];
+    pVel_y = recv_buffer[offset_buff + 4];
+    pVel_z = recv_buffer[offset_buff + 5];
 
     offset_extra = offset_buff + 5;
     #if SINGLE_PARTICLE_MASS
     pMass = particle_mass;
     #else
     offset_extra += 1;
-    pMass  = recv_buffer[ offset_extra ];
+    pMass = recv_buffer[offset_extra];
     #endif
     #ifdef PARTICLE_IDS
     offset_extra += 1;
-    pId    = recv_buffer[ offset_extra ];
+    pId = recv_buffer[offset_extra];
     #else
     pId = 0;
     #endif
@@ -433,17 +444,17 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re
 
     offset_buff += N_DATA_PER_PARTICLE_TRANSFER;
 
-    //GLOBAL PERIODIC BOUNDARIES: for the X direction
-    if ( pPos_x <  G.domainMin_x ) pPos_x += ( G.domainMax_x - G.domainMin_x );
-    if ( pPos_x >= G.domainMax_x ) pPos_x -= ( G.domainMax_x - G.domainMin_x );
+    // GLOBAL PERIODIC BOUNDARIES: for the X direction
+    if (pPos_x < G.domainMin_x) pPos_x += (G.domainMax_x - G.domainMin_x);
+    if (pPos_x >= G.domainMax_x) pPos_x -= (G.domainMax_x - G.domainMin_x);
 
-    //If the particle x_position is outside the local domain there was an error
-    if ( ( pPos_x < G.xMin ) || ( pPos_x >= G.xMax )  ){
-      #ifdef PARTICLE_IDS
+    // If the particle x_position is outside the local domain there was an error
+    if ((pPos_x < G.xMin) || (pPos_x >= G.xMax)) {
+    #ifdef PARTICLE_IDS
       std::cout << "ERROR Particle Transfer out of X domain    pID: " << pId << std::endl;
-      #else
+    #else
       std::cout << "ERROR Particle Transfer out of X domain" << std::endl;
-      #endif
+    #endif
       std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl;
       std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl;
       std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl;
@@ -453,35 +464,39 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re
       continue;
     }
 
-    // If the y_position at the X_Tansfer (direction=0) is outside the local domain, then the particles is added to the buffer for the Y_Transfer
-    if (direction  == 0 ){
-      if ( pPos_y < G.yMin  && flags[2]==5  ){
-        Add_Particle_To_Buffer( send_buffer_y0, n_in_buffer_y0, buffer_length_y0, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z );
+    // If the y_position at the X_Tansfer (direction=0) is outside the local
+    // domain, then the particles is added to the buffer for the Y_Transfer
+    if (direction == 0) {
+      if (pPos_y < G.yMin && flags[2] == 5) {
+        Add_Particle_To_Buffer(send_buffer_y0, n_in_buffer_y0, buffer_length_y0, pId, pMass, pAge, pPos_x, pPos_y,
+                               pPos_z, pVel_x, pVel_y, pVel_z);
         n_send_y0 += 1;
         n_in_buffer_y0 += 1;
         continue;
       }
-      if ( pPos_y >= G.yMax && flags[3]==5 ){
-        Add_Particle_To_Buffer( send_buffer_y1, n_in_buffer_y1, buffer_length_y1, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z );
+      if (pPos_y >= G.yMax && flags[3] == 5) {
+        Add_Particle_To_Buffer(send_buffer_y1, n_in_buffer_y1, buffer_length_y1, pId, pMass, pAge, pPos_x, pPos_y,
+                               pPos_z, pVel_x, pVel_y, pVel_z);
         n_send_y1 += 1;
         n_in_buffer_y1 += 1;
         continue;
       }
     }
 
-    //PERIODIC BOUNDARIES: for the Y direction
-    if (  direction == 1 ){
-      if ( pPos_y <  G.domainMin_y ) pPos_y += ( G.domainMax_y - G.domainMin_y );
-      if ( pPos_y >= G.domainMax_y ) pPos_y -= ( G.domainMax_y - G.domainMin_y );
+    // PERIODIC BOUNDARIES: for the Y direction
+    if (direction == 1) {
+      if (pPos_y < G.domainMin_y) pPos_y += (G.domainMax_y - G.domainMin_y);
+      if (pPos_y >= G.domainMax_y) pPos_y -= (G.domainMax_y - G.domainMin_y);
     }
 
-    //If the particle y_position is outside the local domain after the X-Transfer, there was an error
-    if ( (direction==1 || direction==2) && (( pPos_y < G.yMin ) || ( pPos_y >= G.yMax ))  ){
-      #ifdef PARTICLE_IDS
+    // If the particle y_position is outside the local domain after the
+    // X-Transfer, there was an error
+    if ((direction == 1 || direction == 2) && ((pPos_y < G.yMin) || (pPos_y >= G.yMax))) {
+    #ifdef PARTICLE_IDS
       std::cout << "ERROR Particle Transfer out of Y domain    pID: " << pId << std::endl;
-      #else
+    #else
       std::cout << "ERROR Particle Transfer out of Y domain" << std::endl;
-      #endif
+    #endif
       std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl;
       std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl;
       std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl;
@@ -491,35 +506,39 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re
       continue;
     }
 
-    // If the z_position at the X_Tansfer or Y_Transfer is outside the local domain, then the particles is added to the buffer for the Z_Transfer
-    if (direction  !=2 ){
-      if ( pPos_z < G.zMin && flags[4]==5 ){
-        Add_Particle_To_Buffer( send_buffer_z0, n_in_buffer_z0, buffer_length_z0, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z );
+    // If the z_position at the X_Tansfer or Y_Transfer is outside the local
+    // domain, then the particles is added to the buffer for the Z_Transfer
+    if (direction != 2) {
+      if (pPos_z < G.zMin && flags[4] == 5) {
+        Add_Particle_To_Buffer(send_buffer_z0, n_in_buffer_z0, buffer_length_z0, pId, pMass, pAge, pPos_x, pPos_y,
+                               pPos_z, pVel_x, pVel_y, pVel_z);
         n_send_z0 += 1;
         n_in_buffer_z0 += 1;
         continue;
       }
-      if ( pPos_z >= G.zMax && flags[5]==5  ){
-        Add_Particle_To_Buffer( send_buffer_z1, n_in_buffer_z1, buffer_length_z1, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z );
+      if (pPos_z >= G.zMax && flags[5] == 5) {
+        Add_Particle_To_Buffer(send_buffer_z1, n_in_buffer_z1, buffer_length_z1, pId, pMass, pAge, pPos_x, pPos_y,
+                               pPos_z, pVel_x, pVel_y, pVel_z);
         n_send_z1 += 1;
         n_in_buffer_z1 += 1;
         continue;
       }
     }
 
-    //GLOBAL PERIODIC BOUNDARIES: for the Z direction
-    if (  direction == 2 ){
-      if ( pPos_z <  G.domainMin_z ) pPos_z += ( G.domainMax_z - G.domainMin_z );
-      if ( pPos_z >= G.domainMax_z ) pPos_z -= ( G.domainMax_z - G.domainMin_z );
+    // GLOBAL PERIODIC BOUNDARIES: for the Z direction
+    if (direction == 2) {
+      if (pPos_z < G.domainMin_z) pPos_z += (G.domainMax_z - G.domainMin_z);
+      if (pPos_z >= G.domainMax_z) pPos_z -= (G.domainMax_z - G.domainMin_z);
     }
 
-    //If the particle z_position is outside the local domain after the X-Transfer and Y-Transfer, there was an error
-    if ( (direction==2) && (( pPos_z < G.zMin ) || ( pPos_z >= G.zMax ))  ){
-      #ifdef PARTICLE_IDS
+    // If the particle z_position is outside the local domain after the
+    // X-Transfer and Y-Transfer, there was an error
+    if ((direction == 2) && ((pPos_z < G.zMin) || (pPos_z >= G.zMax))) {
+    #ifdef PARTICLE_IDS
       std::cout << "ERROR Particle Transfer out of Z domain    pID: " << pId << std::endl;
-      #else
+    #else
       std::cout << "ERROR Particle Transfer out of Z domain" << std::endl;
-      #endif
+    #endif
       std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl;
       std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl;
       std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl;
@@ -529,16 +548,16 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re
       continue;
     }
 
-    //If the particle doesn't have to be transferred to the y_direction or z_direction, then add the particle date to the local vectors
-    Add_Particle_To_Vectors( pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z, flags );
+    // If the particle doesn't have to be transferred to the y_direction or
+    // z_direction, then add the particle date to the local vectors
+    Add_Particle_To_Vectors(pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z, flags);
   }
 }
 
-
-//Remove the particles that were transferred outside the local domain
-void Particles_3D::Remove_Transfered_Particles( void ){
-
-  //Get the number of particles to delete
+// Remove the particles that were transferred outside the local domain
+void Particles3D::Remove_Transfered_Particles(void)
+{
+  // Get the number of particles to delete
   part_int_t n_delete = 0;
   n_delete += out_indxs_vec_x0.size();
   n_delete += out_indxs_vec_x1.size();
@@ -548,16 +567,18 @@ void Particles_3D::Remove_Transfered_Particles( void ){
   n_delete += out_indxs_vec_z1.size();
   // std::cout << "N to delete: " << n_delete << std::endl;
 
-  //Concatenate the indices of all the particles that moved into a new vector (delete_indxs_vec)
+  // Concatenate the indices of all the particles that moved into a new vector
+  // (delete_indxs_vec)
   int_vector_t delete_indxs_vec;
-  delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_x0.begin(), out_indxs_vec_x0.end() );
-  delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_x1.begin(), out_indxs_vec_x1.end() );
-  delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_y0.begin(), out_indxs_vec_y0.end() );
-  delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_y1.begin(), out_indxs_vec_y1.end() );
-  delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_z0.begin(), out_indxs_vec_z0.end() );
-  delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_z1.begin(), out_indxs_vec_z1.end() );
-
-  //Clear the vectors that stored the transferred indices for each direction. All these indices are now stored in delete_indxs_vec
+  delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x0.begin(), out_indxs_vec_x0.end());
+  delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x1.begin(), out_indxs_vec_x1.end());
+  delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y0.begin(), out_indxs_vec_y0.end());
+  delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y1.begin(), out_indxs_vec_y1.end());
+  delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z0.begin(), out_indxs_vec_z0.end());
+  delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z1.begin(), out_indxs_vec_z1.end());
+
+  // Clear the vectors that stored the transferred indices for each direction.
+  // All these indices are now stored in delete_indxs_vec
   out_indxs_vec_x0.clear();
   out_indxs_vec_x1.clear();
   out_indxs_vec_y0.clear();
@@ -565,63 +586,66 @@ void Particles_3D::Remove_Transfered_Particles( void ){
   out_indxs_vec_z0.clear();
   out_indxs_vec_z1.clear();
 
-  //Sort the indices that need to be deleted so that the particles are deleted from right to left
+  // Sort the indices that need to be deleted so that the particles are deleted
+  // from right to left
   std::sort(delete_indxs_vec.begin(), delete_indxs_vec.end());
 
   part_int_t indx, pIndx;
-  for ( indx=0; indx<n_delete; indx++ ){
-    //From right to left get the index of the particle that will be deleted
+  for (indx = 0; indx < n_delete; indx++) {
+    // From right to left get the index of the particle that will be deleted
     pIndx = delete_indxs_vec.back();
-    //Remove the particle data at the selected index
-    Remove_Real( pIndx, pos_x );
-    Remove_Real( pIndx, pos_y );
-    Remove_Real( pIndx, pos_z );
-    Remove_Real( pIndx, vel_x );
-    Remove_Real( pIndx, vel_y );
-    Remove_Real( pIndx, vel_z );
-    Remove_Real( pIndx, grav_x );
-    Remove_Real( pIndx, grav_y );
-    Remove_Real( pIndx, grav_z );
+    // Remove the particle data at the selected index
+    Remove_Real(pIndx, pos_x);
+    Remove_Real(pIndx, pos_y);
+    Remove_Real(pIndx, pos_z);
+    Remove_Real(pIndx, vel_x);
+    Remove_Real(pIndx, vel_y);
+    Remove_Real(pIndx, vel_z);
+    Remove_Real(pIndx, grav_x);
+    Remove_Real(pIndx, grav_y);
+    Remove_Real(pIndx, grav_z);
     #ifdef PARTICLE_IDS
-    Remove_ID( pIndx, partIDs );
+    Remove_ID(pIndx, partIDs);
     #endif
     #ifndef SINGLE_PARTICLE_MASS
-    Remove_Real( pIndx, mass );
+    Remove_Real(pIndx, mass);
     #endif
     #ifdef PARTICLE_AGE
     Remove_Real(pIndx, age);
     #endif
 
-    delete_indxs_vec.pop_back(); //Discard the index of ther delted particle from the delete_indxs_vector
-    n_local -= 1; //substract one to the local number of particles
+    delete_indxs_vec.pop_back();  // Discard the index of ther delted particle
+                                  // from the delete_indxs_vector
+    n_local -= 1;                 // substract one to the local number of particles
   }
 
-  //At the end the delete_indxs_vec must be empty
-  if ( delete_indxs_vec.size() != 0 ) std::cout << "ERROR: Deleting Transferred Particles " << std::endl;
+  // At the end the delete_indxs_vec must be empty
+  if (delete_indxs_vec.size() != 0) std::cout << "ERROR: Deleting Transferred Particles " << std::endl;
 
-
-  //Check that the size of the particles data vectors is consistent with the local number of particles
+  // Check that the size of the particles data vectors is consistent with the
+  // local number of particles
   int n_in_out_vectors, n_in_vectors;
-  n_in_vectors =  pos_x.size() + pos_y.size() + pos_z.size() + vel_x.size() + vel_y.size() + vel_z.size() ;
-  #ifndef SINGLE_PARTICLE_MASS
+  n_in_vectors = pos_x.size() + pos_y.size() + pos_z.size() + vel_x.size() + vel_y.size() + vel_z.size();
+    #ifndef SINGLE_PARTICLE_MASS
   n_in_vectors += mass.size();
-  #endif
-  #ifdef PARTICLE_IDS
+    #endif
+    #ifdef PARTICLE_IDS
   n_in_vectors += partIDs.size();
-  #endif
-  #ifdef PARTICLE_AGE
+    #endif
+    #ifdef PARTICLE_AGE
   n_in_vectors += age.size();
-  #endif
+    #endif
 
-  if ( n_in_vectors != n_local * N_DATA_PER_PARTICLE_TRANSFER ){
-    std::cout << "ERROR PARTICLES TRANSFER: DATA IN VECTORS DIFFERENT FROM N_LOCAL###########" << std::endl;
+  if (n_in_vectors != n_local * N_DATA_PER_PARTICLE_TRANSFER) {
+    std::cout << "ERROR PARTICLES TRANSFER: DATA IN VECTORS DIFFERENT FROM "
+                 "N_LOCAL###########"
+              << std::endl;
     exit(-1);
   }
 }
 
-
-
-void Particles_3D::Clear_Vectors_For_Transfers( void ){
+void Particles3D::Clear_Vectors_For_Transfers(void)
+{
   out_indxs_vec_x0.clear();
   out_indxs_vec_x1.clear();
   out_indxs_vec_y0.clear();
@@ -630,8 +654,5 @@ void Particles_3D::Clear_Vectors_For_Transfers( void ){
   out_indxs_vec_z1.clear();
 }
 
-
-
-
-#endif //MPI_CHOLLA
-#endif //PARTICLES
+  #endif  // MPI_CHOLLA
+#endif    // PARTICLES
diff --git a/src/particles/particles_boundaries_gpu.cu b/src/particles/particles_boundaries_gpu.cu
index c51193e35..5f8165be3 100644
--- a/src/particles/particles_boundaries_gpu.cu
+++ b/src/particles/particles_boundaries_gpu.cu
@@ -1,146 +1,152 @@
 #if defined(PARTICLES) && defined(PARTICLES_GPU)
 
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include <iostream>
-#include "../io/io.h"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../particles/particles_3D.h"
-#include "../grid/grid3D.h"
-#include "../particles/particles_boundaries_gpu.h"
-
-#define SCAN_SHARED_SIZE 2*TPB_PARTICLES
-
-
-__global__ void Set_Particles_Boundary_Kernel( int side, part_int_t n_local,  Real *pos_dev, Real d_min, Real d_max, Real d_length ){
-
-  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid >= n_local) return;
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <unistd.h>
+
+  #include <iostream>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../utils/gpu.hpp"
+  #include "particles_3D.h"
+  #include "particles_boundaries_gpu.h"
+
+  #define SCAN_SHARED_SIZE (2 * TPB_PARTICLES)
+
+__global__ void Set_Particles_Boundary_Kernel(int side, part_int_t n_local, Real *pos_dev, Real d_min, Real d_max,
+                                              Real d_length)
+{
+  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid >= n_local) {
+    return;
+  }
 
   Real pos;
   pos = pos_dev[tid];
 
-  if ( side == 0 ){
-    if ( pos < d_min ) pos += d_length;
+  if (side == 0) {
+    if (pos < d_min) {
+      pos += d_length;
+    }
   }
 
-  if ( side == 1 ){
-    if ( pos >= d_max ) pos -= d_length;
+  if (side == 1) {
+    if (pos >= d_max) {
+      pos -= d_length;
+    }
   }
 
   pos_dev[tid] = pos;
-
 }
 
-
-void Grid3D::Set_Particles_Boundary_GPU( int dir, int side ){
-
+void Grid3D::Set_Particles_Boundary_GPU(int dir, int side)
+{
   Real d_min, d_max, L;
   Real *pos_dev;
-  if ( dir == 0 ){
-    d_min = Particles.G.zMin;
-    d_max = Particles.G.zMax;
+  if (dir == 0) {
+    d_min   = Particles.G.zMin;
+    d_max   = Particles.G.zMax;
     pos_dev = Particles.pos_x_dev;
   }
-  if ( dir == 1 ){
-    d_min = Particles.G.yMin;
-    d_max = Particles.G.yMax;
+  if (dir == 1) {
+    d_min   = Particles.G.yMin;
+    d_max   = Particles.G.yMax;
     pos_dev = Particles.pos_y_dev;
   }
-  if ( dir == 2 ){
-    d_min = Particles.G.zMin;
-    d_max = Particles.G.zMax;
+  if (dir == 2) {
+    d_min   = Particles.G.zMin;
+    d_max   = Particles.G.zMax;
     pos_dev = Particles.pos_z_dev;
   }
 
   L = d_max - d_min;
 
   // set values for GPU kernels
-  int grid_size =  (Particles.n_local - 1) / TPB_PARTICLES + 1;
+  int grid_size = (Particles.n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(grid_size, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
 
-  hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0,  side, Particles.n_local, pos_dev, d_min, d_max, L  );
-  CudaCheckError();
+  hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, side, Particles.n_local, pos_dev,
+                     d_min, d_max, L);
+  GPU_Error_Check();
 }
 
-
 // #ifdef MPI_CHOLLA
 
-__global__ void Get_Transfer_Flags_Kernel( part_int_t n_total, int side,  Real d_min, Real d_max, Real *pos_d, bool *transfer_flags_d ){
-
+__global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, Real d_min, Real d_max, Real *pos_d,
+                                          bool *transfer_flags_d)
+{
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  if ( tid >= n_total ) return;
+  if (tid >= n_total) {
+    return;
+  }
 
-  bool transfer = 0;
+  bool transfer = false;
 
   Real pos = pos_d[tid];
-  // if ( tid < 1 ) printf( "%f\n", pos);
 
-  if ( side == 0 ){
-    if ( pos < d_min ) transfer = 1;
+  if (side == 0 && pos < d_min) {
+    transfer = true;
   }
-
-  if ( side == 1 ){
-    if ( pos >= d_max ) transfer = 1;
+  if (side == 1 && pos >= d_max) {
+    transfer = true;
   }
 
-  // if ( transfer ) printf( "##Thread particles transfer\n");
   transfer_flags_d[tid] = transfer;
 }
 
-
-__global__ void Scan_Kernel( part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *prefix_sum_block_d ){
-
+__global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *prefix_sum_block_d)
+{
   __shared__ int data_sh[SCAN_SHARED_SIZE];
 
   int tid_block, block_start;
   // tid = threadIdx.x + blockIdx.x * blockDim.x;
   tid_block = threadIdx.x;
 
-  block_start = 2*blockIdx.x*blockDim.x;
+  block_start = 2 * blockIdx.x * blockDim.x;
 
-  data_sh[2*tid_block] = block_start + 2*tid_block < n_total ? (int) transfer_flags_d[block_start + 2*tid_block]  :  0;
-  data_sh[2*tid_block+1] = block_start + 2*tid_block+1 < n_total ?  (int) transfer_flags_d[block_start + 2*tid_block+1]  :  0;
+  data_sh[2 * tid_block] =
+      block_start + 2 * tid_block < n_total ? (int)transfer_flags_d[block_start + 2 * tid_block] : 0;
+  data_sh[2 * tid_block + 1] =
+      block_start + 2 * tid_block + 1 < n_total ? (int)transfer_flags_d[block_start + 2 * tid_block + 1] : 0;
   __syncthreads();
 
   int offset = 1;
-  int n = blockDim.x*2;
+  int n      = blockDim.x * 2;
 
   int ai, bi;
   int t;
 
-  for (int d = n/2; d>0; d/=2){
-
+  for (int d = n / 2; d > 0; d /= 2) {
     __syncthreads();
-    if ( tid_block < d ){
-      ai = offset*(2*tid_block+1)-1;
-      bi = offset*(2*tid_block+2)-1;
+    if (tid_block < d) {
+      ai = offset * (2 * tid_block + 1) - 1;
+      bi = offset * (2 * tid_block + 2) - 1;
       data_sh[bi] += data_sh[ai];
     }
     offset *= 2;
   }
 
   // Clear the last element
-  if (tid_block == 0) data_sh[n - 1] = 0;
+  if (tid_block == 0) {
+    data_sh[n - 1] = 0;
+  }
 
   // Traverse down tree & build scan
-  for (int d = 1; d < n; d *= 2){
-
+  for (int d = 1; d < n; d *= 2) {
     __syncthreads();
-    offset /=2;
-    if (tid_block < d){
-
-      ai = offset*(2*tid_block+1)-1;
-      bi = offset*(2*tid_block+2)-1;
+    offset /= 2;
+    if (tid_block < d) {
+      ai = offset * (2 * tid_block + 1) - 1;
+      bi = offset * (2 * tid_block + 2) - 1;
 
-      t = data_sh[ai];
+      t           = data_sh[ai];
       data_sh[ai] = data_sh[bi];
       data_sh[bi] += t;
     }
@@ -148,154 +154,190 @@ __global__ void Scan_Kernel( part_int_t n_total, bool *transfer_flags_d, int *pr
   __syncthreads();
 
   // Write results to device memory
-  if ( block_start + 2*tid_block < n_total )  prefix_sum_d[block_start + 2*tid_block] = data_sh[2*tid_block];
-  if ( block_start + 2*tid_block+1 < n_total) prefix_sum_d[block_start + 2*tid_block+1] = data_sh[2*tid_block+1];
+  if (block_start + 2 * tid_block < n_total) {
+    prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block];
+  }
+  if (block_start + 2 * tid_block + 1 < n_total) {
+    prefix_sum_d[block_start + 2 * tid_block + 1] = data_sh[2 * tid_block + 1];
+  }
 
   // Write the block sum
-  int last_flag_block = (int) transfer_flags_d[block_start + 2*(blockDim.x-1)+1];
-  if (tid_block == 0) prefix_sum_block_d[blockIdx.x] = data_sh[2*(blockDim.x-1)+1] + last_flag_block;
+  int last_flag_block = (int)transfer_flags_d[block_start + 2 * (blockDim.x - 1) + 1];
+  if (tid_block == 0) {
+    prefix_sum_block_d[blockIdx.x] = data_sh[2 * (blockDim.x - 1) + 1] + last_flag_block;
+  }
 }
 
-
-__global__ void Prefix_Sum_Blocks_Kernel( int n_partial, int *prefix_sum_block_d ){
-
-  int tid_block, val,  start_index, n_threads;
+__global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d)
+{
+  int tid_block, val, start_index, n_threads;
   tid_block = threadIdx.x;
   n_threads = blockDim.x;
 
   __shared__ int data_sh[TPB_PARTICLES];
 
-
-  int sum = 0;
-  int n = 0;
+  int sum     = 0;
+  int n       = 0;
   start_index = n * n_threads;
-  while( start_index < n_partial ){
-    data_sh[tid_block] = start_index+tid_block < n_partial  ?  prefix_sum_block_d[start_index+tid_block] :  0;
+  while (start_index < n_partial) {
+    data_sh[tid_block] = start_index + tid_block < n_partial ? prefix_sum_block_d[start_index + tid_block] : 0;
     __syncthreads();
 
-
-    if (tid_block == 0){
-      for ( int i=0; i<n_threads; i++ ){
-        val = data_sh[i];
+    if (tid_block == 0) {
+      for (int i = 0; i < n_threads; i++) {
+        val        = data_sh[i];
         data_sh[i] = sum;
         sum += val;
       }
     }
     __syncthreads();
 
-    if (start_index + tid_block < n_partial) prefix_sum_block_d[start_index+tid_block] = data_sh[tid_block];
+    if (start_index + tid_block < n_partial) {
+      prefix_sum_block_d[start_index + tid_block] = data_sh[tid_block];
+    }
     n += 1;
     start_index = n * n_threads;
-
   }
 }
 
-
-__global__ void Sum_Blocks_Kernel( part_int_t n_total,  int *prefix_sum_d, int *prefix_sum_block_d ){
-
+__global__ void Sum_Blocks_Kernel(part_int_t n_total, int *prefix_sum_d, int *prefix_sum_block_d)
+{
   int tid, tid_block, block_id, data_id;
-  tid = threadIdx.x + blockIdx.x * blockDim.x;
+  tid       = threadIdx.x + blockIdx.x * blockDim.x;
   tid_block = threadIdx.x;
-  block_id = blockIdx.x;
-  data_id = block_id/2;
+  block_id  = blockIdx.x;
+  data_id   = block_id / 2;
 
   __shared__ int block_sum_sh[1];
 
-  if ( tid_block == 0 ){
+  if (tid_block == 0) {
     block_sum_sh[0] = prefix_sum_block_d[data_id];
     // printf( "%d   %d\n",  block_id/2, prefix_sum_block[data_id] );
   }
   __syncthreads();
 
-  if (tid < n_total) prefix_sum_d[tid] += block_sum_sh[0];
+  if (tid < n_total) {
+    prefix_sum_d[tid] += block_sum_sh[0];
+  }
 }
 
-
-__global__ void Get_N_Transfer_Particles_Kernel( part_int_t n_total, int *n_transfer_d, bool *transfer_flags_d, int *prefix_sum_d  ){
-  n_transfer_d[0] = prefix_sum_d[n_total-1] + (int)transfer_flags_d[n_total-1];
-  // if ( n_transfer_d[0] > 0 ) printf( "##Thread transfer: %d\n", n_transfer_d[0]);
+__global__ void Get_N_Transfer_Particles_Kernel(part_int_t n_total, int *n_transfer_d, bool *transfer_flags_d,
+                                                int *prefix_sum_d)
+{
+  n_transfer_d[0] = prefix_sum_d[n_total - 1] + (int)transfer_flags_d[n_total - 1];
+  // if ( n_transfer_d[0] > 0 ) printf( "##Thread transfer: %d\n",
+  // n_transfer_d[0]);
 }
 
-__global__ void Get_Transfer_Indices_Kernel( part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *transfer_indices_d ){
-
+__global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d,
+                                            int *transfer_indices_d)
+{
   int tid, transfer_index;
-  tid =  threadIdx.x + blockIdx.x * blockDim.x;
-  if ( tid >= n_total ) return;
+  tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (tid >= n_total) {
+    return;
+  }
   transfer_index = prefix_sum_d[tid];
-  
-  if ( transfer_index < 0 || transfer_index >= n_total ){
-    printf( "#### PARTICLE TRANSFER ERROR:  transfer index outside domain: %d \n", transfer_index  ); 
+
+  if (transfer_index < 0 || transfer_index >= n_total) {
+    printf("#### PARTICLE TRANSFER ERROR:  transfer index outside domain: %d \n", transfer_index);
     return;
   }
-  
-  if ( transfer_flags_d[tid] ) transfer_indices_d[transfer_index] = tid;
 
+  if (transfer_flags_d[tid]) {
+    transfer_indices_d[transfer_index] = tid;
+  }
 }
 
-
-__global__ void Select_Indices_to_Replace_Tranfered_Kernel( part_int_t n_total, int n_transfer, bool *transfer_flags_d, int *prefix_sum_d, int *replace_indices_d ){
-
+__global__ void Select_Indices_to_Replace_Transfered_Kernel(part_int_t n_total, int n_transfer, bool *transfer_flags_d,
+                                                            int *prefix_sum_d, int *replace_indices_d)
+{
   int tid, tid_inv;
   tid = threadIdx.x + blockIdx.x * blockDim.x;
-  if ( tid >= n_total ) return;
+  if (tid >= n_total) {
+    return;
+  }
   tid_inv = n_total - tid - 1;
 
   bool transfer_flag = transfer_flags_d[tid];
-  if ( transfer_flag ) return;
+  if (transfer_flag) {
+    return;
+  }
 
   int prefix_sum_inv, replace_id;
 
   prefix_sum_inv = n_transfer - prefix_sum_d[tid];
-  replace_id = tid_inv - prefix_sum_inv;
-  
-  
-  if ( replace_id < 0 || replace_id >= n_total ){
-    printf( "#### PARTICLE TRANSFER ERROR:  replace index outside domain: %d \n", replace_id  );
+  replace_id     = tid_inv - prefix_sum_inv;
+
+  if (replace_id < 0 || replace_id >= n_total) {
+    printf("#### PARTICLE TRANSFER ERROR:  replace index outside domain: %d \n", replace_id);
     return;
-  } 
+  }
   replace_indices_d[replace_id] = tid;
-
 }
 
-
-
-__global__ void Replace_Transfered_Particles_Kernel( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){
-
+template <typename T>
+__global__ void Replace_Transfered_Particles_Kernel(int n_transfer, T *field_d, int *transfer_indices_d,
+                                                    int *replace_indices_d, bool print_replace)
+{
   int tid;
   tid = threadIdx.x + blockIdx.x * blockDim.x;
-  if ( tid >= n_transfer ) return;
+  if (tid >= n_transfer) {
+    return;
+  }
 
   int dst_id, src_id;
   dst_id = transfer_indices_d[tid];
   src_id = replace_indices_d[tid];
 
-  if ( dst_id < src_id ){
-    if (print_replace) printf("Replacing: %f \n", field_d[dst_id] );
+  if (dst_id < src_id) {
+    if (print_replace) {
+      printf("Replacing: %f \n", field_d[dst_id] * 1.0);
+    }
     field_d[dst_id] = field_d[src_id];
   }
-
 }
 
-
-void Replace_Transfered_Particles_GPU_function(  int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){
+void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, int *transfer_indices_d,
+                                               int *replace_indices_d, bool print_replace)
+{
   int grid_size;
-  grid_size =  (n_transfer - 1) / TPB_PARTICLES + 1;
+  grid_size = (n_transfer - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(grid_size, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
 
-  hipLaunchKernelGGL( Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_transfer,  field_d, transfer_indices_d, replace_indices_d, print_replace );
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d,
+                     transfer_indices_d, replace_indices_d, print_replace);
+  GPU_Error_Check();
 }
 
+void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *field_d, int *transfer_indices_d,
+                                                   int *replace_indices_d, bool print_replace)
+{
+  int grid_size;
+  grid_size = (n_transfer - 1) / TPB_PARTICLES + 1;
+  // number of blocks per 1D grid
+  dim3 dim1dGrid(grid_size, 1, 1);
+  //  number of threads per 1D block
+  dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
+
+  hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d,
+                     transfer_indices_d, replace_indices_d, print_replace);
+  GPU_Error_Check();
+}
 
-part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d  ){
+part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int side, Real domainMin, Real domainMax,
+                                                     Real *pos_d, int *n_transfer_d, int *n_transfer_h,
+                                                     bool *transfer_flags_d, int *transfer_indices_d,
+                                                     int *replace_indices_d, int *transfer_prefix_sum_d,
+                                                     int *transfer_prefix_sum_blocks_d)
+{
   // set values for GPU kernels
   int grid_size, grid_size_half;
-  grid_size =  (n_local - 1) / TPB_PARTICLES + 1;
-  grid_size_half = ( (n_local-1)/2 ) / TPB_PARTICLES + 1;
+  grid_size      = (n_local - 1) / TPB_PARTICLES + 1;
+  grid_size_half = ((n_local - 1) / 2) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(grid_size, 1, 1);
   dim3 dim1dGrid_half(grid_size_half, 1, 1);
@@ -310,122 +352,211 @@ part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int si
     return 0;
   }
 
-  hipLaunchKernelGGL( Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local, side, domainMin, domainMax, pos_d, transfer_flags_d);
-  CudaCheckError();
-
-  hipLaunchKernelGGL( Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0,  n_local, transfer_flags_d, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d );
-  CudaCheckError();
-
-  hipLaunchKernelGGL( Prefix_Sum_Blocks_Kernel, 1, dim1dBlock , 0, 0,  grid_size_half, transfer_prefix_sum_blocks_d );
-  CudaCheckError();
-  
-  hipLaunchKernelGGL( Sum_Blocks_Kernel, dim1dGrid,   dim1dBlock, 0, 0,  n_local, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d );
-  CudaCheckError();
-  
-  hipLaunchKernelGGL( Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0,  n_local,  n_transfer_d, transfer_flags_d, transfer_prefix_sum_d );
-  CudaCheckError();
-  
-  CudaSafeCall( cudaMemcpy( n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost) );
-  CudaCheckError();
-  
-  hipLaunchKernelGGL( Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local , transfer_flags_d, transfer_prefix_sum_d, transfer_indices_d );
-  CudaCheckError();
-  
-  hipLaunchKernelGGL( Select_Indices_to_Replace_Tranfered_Kernel, dim1dGrid, dim1dBlock , 0, 0,  n_local, n_transfer_h[0], transfer_flags_d, transfer_prefix_sum_d, replace_indices_d );
-  CudaCheckError();
+  hipLaunchKernelGGL(Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, side, domainMin, domainMax, pos_d,
+                     transfer_flags_d);
+  GPU_Error_Check();
 
-  // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]);
-  return n_transfer_h[0];
+  hipLaunchKernelGGL(Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, transfer_flags_d, transfer_prefix_sum_d,
+                     transfer_prefix_sum_blocks_d);
+  GPU_Error_Check();
 
-}
+  hipLaunchKernelGGL(Prefix_Sum_Blocks_Kernel, 1, dim1dBlock, 0, 0, grid_size_half, transfer_prefix_sum_blocks_d);
+  GPU_Error_Check();
+
+  hipLaunchKernelGGL(Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_prefix_sum_d,
+                     transfer_prefix_sum_blocks_d);
+  GPU_Error_Check();
+
+  hipLaunchKernelGGL(Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, n_transfer_d, transfer_flags_d,
+                     transfer_prefix_sum_d);
+  GPU_Error_Check();
 
+  GPU_Error_Check(cudaMemcpy(n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost));
+  GPU_Error_Check();
 
+  hipLaunchKernelGGL(Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_flags_d,
+                     transfer_prefix_sum_d, transfer_indices_d);
+  GPU_Error_Check();
 
-__global__ void Load_Transfered_Particles_to_Buffer_Kernel( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type  ){
+  hipLaunchKernelGGL(Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer_h[0],
+                     transfer_flags_d, transfer_prefix_sum_d, replace_indices_d);
+  GPU_Error_Check();
 
+  // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]);
+  return n_transfer_h[0];
+}
+
+__global__ void Load_Transfered_Particles_to_Buffer_Kernel(int n_transfer, int field_id, int n_fields_to_transfer,
+                                                           Real *field_d, int *transfer_indices_d, Real *send_buffer_d,
+                                                           Real domainMin, Real domainMax, int boundary_type)
+{
   int tid;
   tid = threadIdx.x + blockIdx.x * blockDim.x;
-  if ( tid >= n_transfer ) return;
+  if (tid >= n_transfer) {
+    return;
+  }
 
   int src_id, dst_id;
   Real field_val;
-  src_id = transfer_indices_d[tid];
-  dst_id = tid * n_fields_to_transfer + field_id;
+  src_id    = transfer_indices_d[tid];
+  dst_id    = tid * n_fields_to_transfer + field_id;
   field_val = field_d[src_id];
 
   // Set global periodic boundary conditions
-  if ( boundary_type == 1 && field_val < domainMin )  field_val += ( domainMax - domainMin );
-  if ( boundary_type == 1 && field_val >= domainMax ) field_val -= ( domainMax - domainMin );
+  if (boundary_type == 1 && field_val < domainMin) {
+    field_val += (domainMax - domainMin);
+  }
+  if (boundary_type == 1 && field_val >= domainMax) {
+    field_val -= (domainMax - domainMin);
+  }
   send_buffer_d[dst_id] = field_val;
-
 }
 
-void Load_Particles_to_Transfer_GPU_function(  int n_transfer, int field_id, int n_fields_to_transfer,  Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){
-
+void Load_Particles_to_Transfer_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d,
+                                             int *transfer_indices_d, Real *send_buffer_d, Real domainMin,
+                                             Real domainMax, int boundary_type)
+{
   // set values for GPU kernels
   int grid_size;
-  grid_size =  (n_transfer - 1) / TPB_PARTICLES + 1;
+  grid_size = (n_transfer - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(grid_size, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
 
-  hipLaunchKernelGGL( Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0,  n_transfer, field_id, n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, boundary_type );
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id,
+                     n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax,
+                     boundary_type);
+  GPU_Error_Check();
 }
 
+__global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel(int n_transfer, int field_id, int n_fields_to_transfer,
+                                                                part_int_t *field_d, int *transfer_indices_d,
+                                                                Real *send_buffer_d, Real domainMin, Real domainMax,
+                                                                int boundary_type)
+{
+  int tid;
+  tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (tid >= n_transfer) {
+    return;
+  }
 
-void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ){
-
-  int transfer_size;
-  transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER;
-  CudaSafeCall( cudaMemcpy( buffer_h, buffer_d, transfer_size*sizeof(Real), cudaMemcpyDeviceToHost) );
-  CudaCheckError();
+  int src_id, dst_id;
+  part_int_t field_val;
+  src_id    = transfer_indices_d[tid];
+  dst_id    = tid * n_fields_to_transfer + field_id;
+  field_val = field_d[src_id];
 
+  // Set global periodic boundary conditions
+  if (boundary_type == 1 && field_val < domainMin) {
+    field_val += (domainMax - domainMin);
+  }
+  if (boundary_type == 1 && field_val >= domainMax) {
+    field_val -= (domainMax - domainMin);
+  }
+  send_buffer_d[dst_id] = __longlong_as_double(field_val);
 }
 
+void Load_Particles_to_Transfer_Int_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer,
+                                                 part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d,
+                                                 Real domainMin, Real domainMax, int boundary_type)
+{
+  // set values for GPU kernels
+  int grid_size;
+  grid_size = (n_transfer - 1) / TPB_PARTICLES + 1;
+  // number of blocks per 1D grid
+  dim3 dim1dGrid(grid_size, 1, 1);
+  //  number of threads per 1D block
+  dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
 
+  hipLaunchKernelGGL(Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id,
+                     n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax,
+                     boundary_type);
+  GPU_Error_Check();
+}
 
-void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ){
-
+  #ifdef MPI_CHOLLA
+void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d)
+{
   int transfer_size;
   transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER;
-  CudaSafeCall( cudaMemcpy( buffer_d, buffer_h, transfer_size*sizeof(Real), cudaMemcpyHostToDevice) );
-  CudaCheckError();
-
+  GPU_Error_Check(cudaMemcpy(buffer_h, buffer_d, transfer_size * sizeof(Real), cudaMemcpyDeviceToHost));
+  GPU_Error_Check();
 }
 
+void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d)
+{
+  int transfer_size;
+  transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER;
+  GPU_Error_Check(cudaMemcpy(buffer_d, buffer_h, transfer_size * sizeof(Real), cudaMemcpyHostToDevice));
+  GPU_Error_Check();
+}
+  #endif  // MPI_CHOLLA
 
-__global__ void Unload_Transfered_Particles_from_Buffer_Kernel( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d,  Real *recv_buffer_d  ){
-
+__global__ void Unload_Transfered_Particles_from_Buffer_Kernel(int n_local, int n_transfer, int field_id,
+                                                               int n_fields_to_transfer, Real *field_d,
+                                                               Real *recv_buffer_d)
+{
   int tid;
   tid = threadIdx.x + blockIdx.x * blockDim.x;
-  if ( tid >= n_transfer ) return;
+  if (tid >= n_transfer) {
+    return;
+  }
 
   int src_id, dst_id;
-  src_id = tid * n_fields_to_transfer + field_id;
-  dst_id = n_local + tid;
+  src_id          = tid * n_fields_to_transfer + field_id;
+  dst_id          = n_local + tid;
   field_d[dst_id] = recv_buffer_d[src_id];
+}
 
+void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer,
+                                               Real *field_d, Real *recv_buffer_d)
+{
+  // set values for GPU kernels
+  int grid_size;
+  grid_size = (n_transfer - 1) / TPB_PARTICLES + 1;
+  // number of blocks per 1D grid
+  dim3 dim1dGrid(grid_size, 1, 1);
+  //  number of threads per 1D block
+  dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
+
+  hipLaunchKernelGGL(Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer,
+                     field_id, n_fields_to_transfer, field_d, recv_buffer_d);
+  GPU_Error_Check();
 }
 
-void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer,  Real *field_d,  Real *recv_buffer_d  ){
+__global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel(int n_local, int n_transfer, int field_id,
+                                                                   int n_fields_to_transfer, part_int_t *field_d,
+                                                                   Real *recv_buffer_d)
+{
+  int tid;
+  tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (tid >= n_transfer) {
+    return;
+  }
 
+  int src_id, dst_id;
+  src_id          = tid * n_fields_to_transfer + field_id;
+  dst_id          = n_local + tid;
+  field_d[dst_id] = __double_as_longlong(recv_buffer_d[src_id]);
+}
+
+void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer,
+                                                   part_int_t *field_d, Real *recv_buffer_d)
+{
   // set values for GPU kernels
   int grid_size;
-  grid_size =  (n_transfer - 1) / TPB_PARTICLES + 1;
+  grid_size = (n_transfer - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(grid_size, 1, 1);
   //  number of threads per 1D block
   dim3 dim1dBlock(TPB_PARTICLES, 1, 1);
 
-  hipLaunchKernelGGL( Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d );
-  CudaCheckError();
-
+  hipLaunchKernelGGL(Unload_Transfered_Particles_Int_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local,
+                     n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d);
+  GPU_Error_Check();
 }
 
 // #endif//MPI_CHOLLA
 
-
-#endif //PARTICLES
+#endif  // PARTICLES
diff --git a/src/particles/particles_boundaries_gpu.h b/src/particles/particles_boundaries_gpu.h
index d10fb3428..638102ad2 100644
--- a/src/particles/particles_boundaries_gpu.h
+++ b/src/particles/particles_boundaries_gpu.h
@@ -1,21 +1,34 @@
 #if defined(PARTICLES) && defined(PARTICLES_GPU)
 
-#ifndef PARTICLES_BOUNDARIES_H
-#define PARTICLES_BOUNDARIES_H
-
-part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d  );
-
-void Load_Particles_to_Transfer_GPU_function(  int n_transfer, int field_id, int n_fields_to_transfer,  Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type );
-
-void Replace_Transfered_Particles_GPU_function(  int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace );
-
-void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d );
-
-void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d );
-
-void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer,  Real *field_d,  Real *recv_buffer_d );
-
-
-
-#endif //PARTICLES_H
-#endif //PARTICLES
\ No newline at end of file
+  #ifndef PARTICLES_BOUNDARIES_H
+    #define PARTICLES_BOUNDARIES_H
+
+part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int side, Real domainMin, Real domainMax,
+                                                     Real *pos_d, int *n_transfer_d, int *n_transfer_h,
+                                                     bool *transfer_flags_d, int *transfer_indices_d,
+                                                     int *replace_indices_d, int *transfer_prefix_sum_d,
+                                                     int *transfer_prefix_sum_blocks_d);
+
+void Load_Particles_to_Transfer_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d,
+                                             int *transfer_indices_d, Real *send_buffer_d, Real domainMin,
+                                             Real domainMax, int boundary_type);
+void Load_Particles_to_Transfer_Int_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer,
+                                                 part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d,
+                                                 Real domainMin, Real domainMax, int boundary_type);
+
+void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, int *transfer_indices_d,
+                                               int *replace_indices_d, bool print_replace);
+void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *field_d, int *transfer_indices_d,
+                                                   int *replace_indices_d, bool print_replace);
+
+void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d);
+
+void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d);
+
+void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer,
+                                               Real *field_d, Real *recv_buffer_d);
+void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer,
+                                                   part_int_t *field_d, Real *recv_buffer_d);
+
+  #endif  // PARTICLES_H
+#endif    // PARTICLES
\ No newline at end of file
diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp
index de00b1426..39aeba6c7 100644
--- a/src/particles/particles_dynamics.cpp
+++ b/src/particles/particles_dynamics.cpp
@@ -1,58 +1,55 @@
 #ifdef PARTICLES
 
+  #include <stdio.h>
+  #include <stdlib.h>
 
-#include <stdio.h>
-#include <stdlib.h>
-#include "math.h"
-#include <iostream>
-#include "../global/global.h"
-#include "../grid/grid3D.h"
-#include "../particles/particles_3D.h"
-#include "../io/io.h"
+  #include <iostream>
 
-#ifdef PARALLEL_OMP
-#include "../utils/parallel_omp.h"
-#endif
+  #include "../global/global.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "math.h"
+  #include "particles_3D.h"
 
+  #ifdef PARALLEL_OMP
+    #include "../utils/parallel_omp.h"
+  #endif
 
-//Compute the delta_t for the particles
-Real Grid3D::Calc_Particles_dt( ){
-
+// Compute the delta_t for the particles
+Real Grid3D::Calc_Particles_dt()
+{
   Real dt_particles;
 
   #ifdef PARTICLES_CPU
 
-  #ifndef PARALLEL_OMP
-  dt_particles = Calc_Particles_dt_function( 0, Particles.n_local );
-  #else
+    #ifndef PARALLEL_OMP
+  dt_particles = Calc_Particles_dt_function(0, Particles.n_local);
+    #else
   dt_particles = 1e100;
   Real dt_particles_all[N_OMP_THREADS];
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     part_int_t p_start, p_end;
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id,  &p_start, &p_end );
-    dt_particles_all[omp_id] = Calc_Particles_dt_function( p_start, p_end );
+    Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end);
+    dt_particles_all[omp_id] = Calc_Particles_dt_function(p_start, p_end);
   }
 
-  for ( int i=0; i<N_OMP_THREADS; i++ ){
-    dt_particles = fmin( dt_particles, dt_particles_all[i]);
+  for (int i = 0; i < N_OMP_THREADS; i++) {
+    dt_particles = fmin(dt_particles, dt_particles_all[i]);
   }
-  #endif //PARALLEL_OMP
-  #endif //PARTICLES_CPU
-
+    #endif  // PARALLEL_OMP
+  #endif    // PARTICLES_CPU
 
   #ifdef PARTICLES_GPU
   dt_particles = Calc_Particles_dt_GPU();
-  #endif//PARTICLES_GPU
-
-
+  #endif  // PARTICLES_GPU
 
   Real dt_particles_global;
   #ifdef MPI_CHOLLA
-  dt_particles_global = ReduceRealMin( dt_particles );
+  dt_particles_global = ReduceRealMin(dt_particles);
   #else
   dt_particles_global = dt_particles;
   #endif
@@ -60,295 +57,307 @@ Real Grid3D::Calc_Particles_dt( ){
   return dt_particles_global;
 }
 
+  #ifdef PARTICLES_GPU
 
-#ifdef PARTICLES_GPU
-
-//Go over all the particles and find dt_min in the GPU
-Real Grid3D::Calc_Particles_dt_GPU(){
-
+// Go over all the particles and find dt_min in the GPU
+Real Grid3D::Calc_Particles_dt_GPU()
+{
   // set values for GPU kernels
-  int ngrid =  (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
-
-
-  if ( ngrid > Particles.G.size_blocks_array ) chprintf(" Error: particles dt_array too small\n");
+  int ngrid = (Particles.n_local - 1) / TPB_PARTICLES + 1;
 
+  if (ngrid > Particles.G.size_blocks_array) {
+    chprintf(" Error: particles dt_array too small\n");
+  }
 
   Real max_dti;
-  max_dti = Particles.Calc_Particles_dt_GPU_function( ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.G.dti_array_host, Particles.G.dti_array_dev );
+  max_dti = Particles.Calc_Particles_dt_GPU_function(
+      ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, Particles.vel_x_dev,
+      Particles.vel_y_dev, Particles.vel_z_dev, Particles.G.dti_array_host, Particles.G.dti_array_dev);
 
   Real dt_min;
 
-  #ifdef COSMOLOGY
+    #ifdef COSMOLOGY
   Real scale_factor, vel_factor, da_min;
-  scale_factor = 1 / ( Cosmo.current_a * Cosmo.Get_Hubble_Parameter( Cosmo.current_a) ) * Cosmo.cosmo_h;
-  vel_factor = Cosmo.current_a / scale_factor;
-  da_min = vel_factor / max_dti;
-  dt_min = Cosmo.Get_dt_from_da( da_min );
-  #else
+  scale_factor = 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * Cosmo.cosmo_h;
+  vel_factor   = Cosmo.current_a / scale_factor;
+  da_min       = vel_factor / max_dti;
+  dt_min       = Cosmo.Get_dt_from_da(da_min);
+    #else
   dt_min = 1 / max_dti;
-  #endif
-
-  return Particles.C_cfl*dt_min;
+    #endif
 
+  return Particles.C_cfl * dt_min;
 }
 
-//Update positions and velocities (step 1 of KDK scheme ) in the GPU
-void Grid3D::Advance_Particles_KDK_Step1_GPU(){
-
-  #ifdef COSMOLOGY
-  Particles.Advance_Particles_KDK_Step1_Cosmo_GPU_function( Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K );
-  #else
-  Particles.Advance_Particles_KDK_Step1_GPU_function( Particles.n_local, Particles.dt, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev );
-  #endif
-
-
+// Update positions and velocities (step 1 of KDK scheme ) in the GPU
+void Grid3D::Advance_Particles_KDK_Step1_GPU()
+{
+    #ifdef COSMOLOGY
+  Particles.Advance_Particles_KDK_Step1_Cosmo_GPU_function(
+      Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev,
+      Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev,
+      Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K);
+    #else
+  Particles.Advance_Particles_KDK_Step1_GPU_function(Particles.n_local, Particles.dt, Particles.pos_x_dev,
+                                                     Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev,
+                                                     Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev,
+                                                     Particles.grav_y_dev, Particles.grav_z_dev);
+    #endif
 }
 
-//Update velocities (step 2 of KDK scheme ) in the GPU
-void Grid3D::Advance_Particles_KDK_Step2_GPU(){
-
-  #ifdef COSMOLOGY
-  Particles.Advance_Particles_KDK_Step2_Cosmo_GPU_function( Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K );
-  #else
-  Particles.Advance_Particles_KDK_Step2_GPU_function( Particles.n_local, Particles.dt, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev );
-  #endif
-
-
+// Update velocities (step 2 of KDK scheme ) in the GPU
+void Grid3D::Advance_Particles_KDK_Step2_GPU()
+{
+    #ifdef COSMOLOGY
+  Particles.Advance_Particles_KDK_Step2_Cosmo_GPU_function(
+      Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev,
+      Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h,
+      Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K);
+    #else
+  Particles.Advance_Particles_KDK_Step2_GPU_function(Particles.n_local, Particles.dt, Particles.vel_x_dev,
+                                                     Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev,
+                                                     Particles.grav_y_dev, Particles.grav_z_dev);
+    #endif
 }
 
+  #endif  // PARTICLES_GPU
 
-#endif //PARTICLES_GPU
-
-
-
-
-#ifdef PARTICLES_CPU
+  #ifdef PARTICLES_CPU
 
-//Loop over the particles anf compute dt_min
-Real Grid3D::Calc_Particles_dt_function( part_int_t p_start, part_int_t p_end ){
+// Loop over the particles anf compute dt_min
+Real Grid3D::Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end)
+{
   part_int_t pID;
   Real dt, dt_min, vel;
   dt_min = 1e100;
 
-  for ( pID=p_start; pID<p_end; pID++ ){
+  for (pID = p_start; pID < p_end; pID++) {
     vel = fabs(Particles.vel_x[pID]);
-    if ( vel > 0){
-      dt = Particles.G.dx / vel;
-      dt_min = std::min( dt_min, dt);
+    if (vel > 0) {
+      dt     = Particles.G.dx / vel;
+      dt_min = std::min(dt_min, dt);
     }
     vel = fabs(Particles.vel_y[pID]);
-    if ( vel > 0){
-      dt = Particles.G.dy / vel;
-      dt_min = std::min( dt_min, dt);
+    if (vel > 0) {
+      dt     = Particles.G.dy / vel;
+      dt_min = std::min(dt_min, dt);
     }
     vel = fabs(Particles.vel_z[pID]);
-    if ( vel > 0){
-      dt = Particles.G.dz / vel;
-      dt_min = std::min( dt_min, dt);
+    if (vel > 0) {
+      dt     = Particles.G.dz / vel;
+      dt_min = std::min(dt_min, dt);
     }
   }
   return Particles.C_cfl * dt_min;
 }
-#endif //PARTICLES_CPU
-
-//Update the particles positions and velocities
-void Grid3D::Advance_Particles( int N_step ){
+  #endif  // PARTICLES_CPU
 
+// Update the particles positions and velocities
+void Grid3D::Advance_Particles(int N_step)
+{
+  GPU_Error_Check();
   #ifdef CPU_TIME
-  if ( N_step == 1) Timer.Advance_Part_1.Start();
-  if ( N_step == 2) Timer.Advance_Part_2.Start();
+  if (N_step == 1) {
+    Timer.Advance_Part_1.Start();
+  }
+  if (N_step == 2) {
+    Timer.Advance_Part_2.Start();
+  }
   #endif
 
   #ifdef PARTICLES_KDK
-  //Update the velocities by 0.5*delta_t and update the positions by delta_t
-  if ( N_step == 1 ) Advance_Particles_KDK_Step1();
+  // Update the velocities by 0.5*delta_t and update the positions by delta_t
+  if (N_step == 1) {
+    Advance_Particles_KDK_Step1();
+  }
   #endif
 
-  if ( N_step == 2 ){
-    //Compute the particles accelerations at the new positions
+  if (N_step == 2) {
+    // Compute the particles accelerations at the new positions
     Get_Particles_Acceleration();
 
-    #ifdef PARTICLES_KDK
-    //Advance the particles velocities by the remaining 0.5*delta_t
+  #ifdef PARTICLES_KDK
+    // Advance the particles velocities by the remaining 0.5*delta_t
     Advance_Particles_KDK_Step2();
-    #endif
-
+  #endif
   }
 
   #ifdef CPU_TIME
-  if ( N_step == 1) Timer.Advance_Part_1.End();
-  if ( N_step == 2) Timer.Advance_Part_2.End();
+  if (N_step == 1) {
+    Timer.Advance_Part_1.End();
+  }
+  if (N_step == 2) {
+    Timer.Advance_Part_2.End();
+  }
   #endif
-
+  GPU_Error_Check();
 }
 
 // Get the accteleration for all the particles
-void Grid3D::Get_Particles_Acceleration(){
-
-  //First compute the gravitational field at the center of the grid cells
+void Grid3D::Get_Particles_Acceleration()
+{
+  // First compute the gravitational field at the center of the grid cells
   Get_Gravity_Field_Particles();
 
-  //Then Interpolate the gravitational field from the centers of the cells to the positions of the particles
+  // Then Interpolate the gravitational field from the centers of the cells to
+  // the positions of the particles
   Get_Gravity_CIC();
 }
 
-//Update positions and velocities (step 1 of KDK scheme )
-void Grid3D::Advance_Particles_KDK_Step1( ){
-
+// Update positions and velocities (step 1 of KDK scheme )
+void Grid3D::Advance_Particles_KDK_Step1()
+{
   #ifdef PARTICLES_CPU
 
-  #ifndef PARALLEL_OMP
-  #ifdef COSMOLOGY
-  Advance_Particles_KDK_Cosmo_Step1_function( 0, Particles.n_local );
-  #else
-  Advance_Particles_KDK_Step1_function( 0, Particles.n_local );
-  #endif//COSMOLOGY
-  #else
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+    #ifndef PARALLEL_OMP
+      #ifdef COSMOLOGY
+  Advance_Particles_KDK_Cosmo_Step1_function(0, Particles.n_local);
+      #else
+  Advance_Particles_KDK_Step1_function(0, Particles.n_local);
+      #endif  // COSMOLOGY
+    #else
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     part_int_t p_start, p_end;
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id,  &p_start, &p_end );
-    #ifdef COSMOLOGY
-    Advance_Particles_KDK_Cosmo_Step1_function( p_start, p_end );
-    #else
-    Advance_Particles_KDK_Step1_function( p_start, p_end );
-    #endif//COSMOLOGY
+    Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end);
+      #ifdef COSMOLOGY
+    Advance_Particles_KDK_Cosmo_Step1_function(p_start, p_end);
+      #else
+    Advance_Particles_KDK_Step1_function(p_start, p_end);
+      #endif  // COSMOLOGY
   }
-  #endif //PARALLEL_OMP
-  #endif //PARTICLES_CPU
+    #endif    // PARALLEL_OMP
+  #endif      // PARTICLES_CPU
 
   #ifdef PARTICLES_GPU
   Advance_Particles_KDK_Step1_GPU();
-  #endif //PARTICLES_GPU
-
+  #endif  // PARTICLES_GPU
 }
 
-//Update velocities (step 2 of KDK scheme )
-void Grid3D::Advance_Particles_KDK_Step2( ){
-
+// Update velocities (step 2 of KDK scheme )
+void Grid3D::Advance_Particles_KDK_Step2()
+{
   #ifdef PARTICLES_CPU
 
-  #ifndef PARALLEL_OMP
-  #ifdef COSMOLOGY
-  Advance_Particles_KDK_Cosmo_Step2_function( 0, Particles.n_local );
-  #else
-  Advance_Particles_KDK_Step2_function( 0, Particles.n_local );
-  #endif//COSMOLOGY
-  #else
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+    #ifndef PARALLEL_OMP
+      #ifdef COSMOLOGY
+  Advance_Particles_KDK_Cosmo_Step2_function(0, Particles.n_local);
+      #else
+  Advance_Particles_KDK_Step2_function(0, Particles.n_local);
+      #endif  // COSMOLOGY
+    #else
+      #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     part_int_t p_start, p_end;
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id,  &p_start, &p_end );
-    #ifdef COSMOLOGY
-    Advance_Particles_KDK_Cosmo_Step2_function( p_start, p_end );
-    #else
-    Advance_Particles_KDK_Step2_function( p_start, p_end );
-    #endif//COSMOLOGY
+    Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end);
+      #ifdef COSMOLOGY
+    Advance_Particles_KDK_Cosmo_Step2_function(p_start, p_end);
+      #else
+    Advance_Particles_KDK_Step2_function(p_start, p_end);
+      #endif  // COSMOLOGY
   }
-  #endif //PARALLEL_OMP
-  #endif //PARTICLES_CPU
+    #endif    // PARALLEL_OMP
+  #endif      // PARTICLES_CPU
 
   #ifdef PARTICLES_GPU
   Advance_Particles_KDK_Step2_GPU();
-  #endif //PARTICLES_GPU
-
+  #endif  // PARTICLES_GPU
 }
 
-#ifdef PARTICLES_CPU
-//Update positions and velocities (step 1 of KDK scheme )
-void Grid3D::Advance_Particles_KDK_Step1_function( part_int_t p_start, part_int_t p_end ){
-
+  #ifdef PARTICLES_CPU
+// Update positions and velocities (step 1 of KDK scheme )
+void Grid3D::Advance_Particles_KDK_Step1_function(part_int_t p_start, part_int_t p_end)
+{
   part_int_t pID;
   Real dt = Particles.dt;
   // Advance velocities by half a step
-  for ( pID=p_start; pID<p_end; pID++ ){
+  for (pID = p_start; pID < p_end; pID++) {
     Particles.vel_x[pID] += 0.5 * dt * Particles.grav_x[pID];
     Particles.vel_y[pID] += 0.5 * dt * Particles.grav_y[pID];
     Particles.vel_z[pID] += 0.5 * dt * Particles.grav_z[pID];
   }
 
-  //Advance Positions by delta_t using the updated velocities
-  for ( pID=p_start; pID<p_end; pID++ ){
+  // Advance Positions by delta_t using the updated velocities
+  for (pID = p_start; pID < p_end; pID++) {
     Particles.pos_x[pID] += dt * Particles.vel_x[pID];
     Particles.pos_y[pID] += dt * Particles.vel_y[pID];
     Particles.pos_z[pID] += dt * Particles.vel_z[pID];
   }
 }
 
-//Update  velocities (step 2 of KDK scheme )
-void Grid3D::Advance_Particles_KDK_Step2_function( part_int_t p_start, part_int_t p_end ){
-
+// Update  velocities (step 2 of KDK scheme )
+void Grid3D::Advance_Particles_KDK_Step2_function(part_int_t p_start, part_int_t p_end)
+{
   part_int_t pID;
   Real dt = Particles.dt;
   // Advance velocities by the second half a step
-  for ( pID=p_start; pID<p_end; pID++ ){
+  for (pID = p_start; pID < p_end; pID++) {
     Particles.vel_x[pID] += 0.5 * dt * Particles.grav_x[pID];
     Particles.vel_y[pID] += 0.5 * dt * Particles.grav_y[pID];
     Particles.vel_z[pID] += 0.5 * dt * Particles.grav_z[pID];
   }
 }
-#endif //PARTICLES_CPU
+  #endif  // PARTICLES_CPU
 
-#ifdef COSMOLOGY
-
-//Compute the delta_t for the particles  COSMOLOGICAL SIMULATION
-Real Grid3D::Calc_Particles_dt_Cosmo(){
+  #ifdef COSMOLOGY
 
+// Compute the delta_t for the particles  COSMOLOGICAL SIMULATION
+Real Grid3D::Calc_Particles_dt_Cosmo()
+{
   Real dt_particles;
 
-  #ifdef PARTICLES_CPU
+    #ifdef PARTICLES_CPU
 
-  #ifndef PARALLEL_OMP
-  dt_particles = Calc_Particles_dt_Cosmo_function( 0, Particles.n_local );
-  #else
+      #ifndef PARALLEL_OMP
+  dt_particles = Calc_Particles_dt_Cosmo_function(0, Particles.n_local);
+      #else
   dt_particles = 1e100;
   Real dt_particles_all[N_OMP_THREADS];
-  #pragma omp parallel num_threads( N_OMP_THREADS )
+        #pragma omp parallel num_threads(N_OMP_THREADS)
   {
     int omp_id, n_omp_procs;
     part_int_t p_start, p_end;
-    omp_id = omp_get_thread_num();
+    omp_id      = omp_get_thread_num();
     n_omp_procs = omp_get_num_threads();
-    Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id,  &p_start, &p_end );
-    dt_particles_all[omp_id] = Calc_Particles_dt_Cosmo_function( p_start, p_end );
+    Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end);
+    dt_particles_all[omp_id] = Calc_Particles_dt_Cosmo_function(p_start, p_end);
   }
 
-  for ( int i=0; i<N_OMP_THREADS; i++ ){
-    dt_particles = fmin( dt_particles, dt_particles_all[i]);
+  for (int i = 0; i < N_OMP_THREADS; i++) {
+    dt_particles = fmin(dt_particles, dt_particles_all[i]);
   }
-  #endif //PARALLEL_OMP
-  #endif //PARTICLES_CPU
+      #endif  // PARALLEL_OMP
+    #endif    // PARTICLES_CPU
 
-  #ifdef PARTICLES_GPU
+    #ifdef PARTICLES_GPU
   dt_particles = Calc_Particles_dt_GPU();
-  #endif//PARTICLES_GPU
+    #endif  // PARTICLES_GPU
 
   Real dt_particles_global;
-  #ifdef MPI_CHOLLA
-  dt_particles_global = ReduceRealMin( dt_particles );
-  #else
+    #ifdef MPI_CHOLLA
+  dt_particles_global = ReduceRealMin(dt_particles);
+    #else
   dt_particles_global = dt_particles;
-  #endif
+    #endif
 
   return dt_particles_global;
 }
 
-
-#ifdef PARTICLES_CPU
-//Loop over the particles anf compute dt_min for a cosmological simulation
-Real Grid3D::Calc_Particles_dt_Cosmo_function( part_int_t p_start, part_int_t p_end ){
-
+    #ifdef PARTICLES_CPU
+// Loop over the particles anf compute dt_min for a cosmological simulation
+Real Grid3D::Calc_Particles_dt_Cosmo_function(part_int_t p_start, part_int_t p_end)
+{
   part_int_t pID;
   Real da, da_min, vel, dt_min;
-  da_min = 1e100;
-  Real scale_factor = 1 / ( Cosmo.current_a * Cosmo.Get_Hubble_Parameter( Cosmo.current_a) ) * Cosmo.cosmo_h;
-  Real a2 = ( Cosmo.current_a )*( Cosmo.current_a  );
+  da_min            = 1e100;
+  Real scale_factor = 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * Cosmo.cosmo_h;
+  Real a2           = (Cosmo.current_a) * (Cosmo.current_a);
 
   Real vel_factor;
   vel_factor = Cosmo.current_a / scale_factor;
@@ -358,63 +367,62 @@ Real Grid3D::Calc_Particles_dt_Cosmo_function( part_int_t p_start, part_int_t p_
   vy_max = 0;
   vz_max = 0;
 
-  for ( pID=p_start; pID<p_end; pID++ ){
-    vx_max = fmax( vx_max,  fabs(Particles.vel_x[pID]) );
-    vy_max = fmax( vy_max,  fabs(Particles.vel_y[pID]) );
-    vz_max = fmax( vz_max,  fabs(Particles.vel_z[pID]) );
+  for (pID = p_start; pID < p_end; pID++) {
+    vx_max = fmax(vx_max, fabs(Particles.vel_x[pID]));
+    vy_max = fmax(vy_max, fabs(Particles.vel_y[pID]));
+    vz_max = fmax(vz_max, fabs(Particles.vel_z[pID]));
   }
 
-  da_min = fmin( Particles.G.dx / vx_max, Particles.G.dy / vy_max  );
-  da_min = fmin( Particles.G.dz / vz_max, da_min  );
+  da_min = fmin(Particles.G.dx / vx_max, Particles.G.dy / vy_max);
+  da_min = fmin(Particles.G.dz / vz_max, da_min);
   da_min *= vel_factor;
-  dt_min = Cosmo.Get_dt_from_da( da_min );
+  dt_min = Cosmo.Get_dt_from_da(da_min);
   return Particles.C_cfl * dt_min;
 }
 
-
-//Update positions and velocities (step 1 of KDK scheme ) COSMOLOGICAL SIMULATION
-void Grid3D::Advance_Particles_KDK_Cosmo_Step1_function( part_int_t p_start, part_int_t p_end ){
-
+// Update positions and velocities (step 1 of KDK scheme ) COSMOLOGICAL
+// SIMULATION
+void Grid3D::Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, part_int_t p_end)
+{
   Real dt, dt_half;
   part_int_t pIndx;
-  Real a = Cosmo.current_a;
-  Real da = Cosmo.delta_a;
-  Real da_half = da/2;
-  Real a_half = a + da_half;
+  Real a       = Cosmo.current_a;
+  Real da      = Cosmo.delta_a;
+  Real da_half = da / 2;
+  Real a_half  = a + da_half;
 
   Real H, H_half;
-  H = Cosmo.Get_Hubble_Parameter( a );
-  H_half = Cosmo.Get_Hubble_Parameter( a_half );
+  H      = Cosmo.Get_Hubble_Parameter(a);
+  H_half = Cosmo.Get_Hubble_Parameter(a_half);
 
-  dt = da / ( a * H ) * Cosmo.cosmo_h;
-  dt_half = da / ( a_half * H_half ) * Cosmo.cosmo_h / ( a_half );
+  dt      = da / (a * H) * Cosmo.cosmo_h;
+  dt_half = da / (a_half * H_half) * Cosmo.cosmo_h / (a_half);
 
   Real pos_x, vel_x, grav_x;
   Real pos_y, vel_y, grav_y;
   Real pos_z, vel_z, grav_z;
-  for ( pIndx=p_start; pIndx<p_end; pIndx++ ){
-    pos_x = Particles.pos_x[pIndx];
-    pos_y = Particles.pos_y[pIndx];
-    pos_z = Particles.pos_z[pIndx];
-    vel_x = Particles.vel_x[pIndx];
-    vel_y = Particles.vel_y[pIndx];
-    vel_z = Particles.vel_z[pIndx];
+  for (pIndx = p_start; pIndx < p_end; pIndx++) {
+    pos_x  = Particles.pos_x[pIndx];
+    pos_y  = Particles.pos_y[pIndx];
+    pos_z  = Particles.pos_z[pIndx];
+    vel_x  = Particles.vel_x[pIndx];
+    vel_y  = Particles.vel_y[pIndx];
+    vel_z  = Particles.vel_z[pIndx];
     grav_x = Particles.grav_x[pIndx];
     grav_y = Particles.grav_y[pIndx];
     grav_z = Particles.grav_z[pIndx];
 
     // Advance velocities by half a step
-    vel_x = ( a*vel_x + 0.5*dt*grav_x ) / a_half;
-    vel_y = ( a*vel_y + 0.5*dt*grav_y ) / a_half;
-    vel_z = ( a*vel_z + 0.5*dt*grav_z ) / a_half;
+    vel_x = (a * vel_x + 0.5 * dt * grav_x) / a_half;
+    vel_y = (a * vel_y + 0.5 * dt * grav_y) / a_half;
+    vel_z = (a * vel_z + 0.5 * dt * grav_z) / a_half;
 
-    //Advance the positions by delta_t using the updated velocities
+    // Advance the positions by delta_t using the updated velocities
     pos_x += dt_half * vel_x;
     pos_y += dt_half * vel_y;
     pos_z += dt_half * vel_z;
 
-
-    //Save the updated positions and velocities
+    // Save the updated positions and velocities
     Particles.pos_x[pIndx] = pos_x;
     Particles.pos_y[pIndx] = pos_y;
     Particles.pos_z[pIndx] = pos_z;
@@ -425,20 +433,21 @@ void Grid3D::Advance_Particles_KDK_Cosmo_Step1_function( part_int_t p_start, par
   }
 }
 
-//Update velocities (step 2 of KDK scheme ) COSMOLOGICAL SIMULATION
-void Grid3D::Advance_Particles_KDK_Cosmo_Step2_function( part_int_t p_start, part_int_t p_end ){
+// Update velocities (step 2 of KDK scheme ) COSMOLOGICAL SIMULATION
+void Grid3D::Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, part_int_t p_end)
+{
   Real dt;
   part_int_t pIndx;
-  Real a = Cosmo.current_a;
-  Real da = Cosmo.delta_a;
+  Real a       = Cosmo.current_a;
+  Real da      = Cosmo.delta_a;
   Real da_half = da / 2;
-  Real a_half = a - da + da_half;
+  Real a_half  = a - da + da_half;
 
-  dt = da / ( a * Cosmo.Get_Hubble_Parameter( a ) ) * Cosmo.cosmo_h;
+  dt = da / (a * Cosmo.Get_Hubble_Parameter(a)) * Cosmo.cosmo_h;
 
   Real grav_x, grav_y, grav_z;
   Real vel_x, vel_y, vel_z;
-  for ( pIndx=p_start; pIndx<p_end; pIndx++ ){
+  for (pIndx = p_start; pIndx < p_end; pIndx++) {
     grav_x = Particles.grav_x[pIndx];
     grav_y = Particles.grav_y[pIndx];
     grav_z = Particles.grav_z[pIndx];
@@ -448,29 +457,14 @@ void Grid3D::Advance_Particles_KDK_Cosmo_Step2_function( part_int_t p_start, par
     vel_z = Particles.vel_z[pIndx];
 
     // Advance velocities by half a step
-    Particles.vel_x[pIndx] = ( a_half*vel_x + 0.5*dt*grav_x ) / a;
-    Particles.vel_y[pIndx] = ( a_half*vel_y + 0.5*dt*grav_y ) / a;
-    Particles.vel_z[pIndx] = ( a_half*vel_z + 0.5*dt*grav_z ) / a;
-
+    Particles.vel_x[pIndx] = (a_half * vel_x + 0.5 * dt * grav_x) / a;
+    Particles.vel_y[pIndx] = (a_half * vel_y + 0.5 * dt * grav_y) / a;
+    Particles.vel_z[pIndx] = (a_half * vel_z + 0.5 * dt * grav_z) / a;
   }
 }
 
+    #endif  // PARTICLES_CPU
 
-#endif //PARTICLES_CPU
-
-
-
-#endif //COSMOLOGY
-
-
-
-
-
-
-
-
-
-
-
+  #endif  // COSMOLOGY
 
-#endif//PARTICLES
+#endif  // PARTICLES
diff --git a/src/particles/particles_dynamics_gpu.cu b/src/particles/particles_dynamics_gpu.cu
index d1fd1614b..817040dca 100644
--- a/src/particles/particles_dynamics_gpu.cu
+++ b/src/particles/particles_dynamics_gpu.cu
@@ -1,35 +1,34 @@
 #if defined(PARTICLES) && defined(PARTICLES_GPU)
 
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../grid/grid3D.h"
-#include "../io/io.h"
-#include "../particles/particles_3D.h"
-
-#ifdef COSMOLOGY
-#include "../cosmology/cosmology.h"
+  #include <math.h>
+  #include <stdio.h>
+  #include <stdlib.h>
+  #include <unistd.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../grid/grid3D.h"
+  #include "../io/io.h"
+  #include "../utils/gpu.hpp"
+  #include "particles_3D.h"
+
+  #ifdef COSMOLOGY
+    #include "../cosmology/cosmology.h"
 // #include "../cosmology/cosmology_functions_gpu.h"
 
-// FUTURE FIX: The Hubble function was defined here because I couldn't get it form other file, tried -dc flag when compiling buu paris broke.
-__device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K ){
-  Real a2 = a * a;
-  Real a3 = a2 * a;
-  Real factor = ( Omega_M/a3 + Omega_K/a2 + Omega_L );
+// FUTURE FIX: The Hubble function was defined here because I couldn't get it
+// form other file, tried -dc flag when compiling buu paris broke.
+__device__ Real Get_Hubble_Parameter_dev(Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K)
+{
+  Real a2     = a * a;
+  Real a3     = a2 * a;
+  Real factor = (Omega_M / a3 + Omega_K / a2 + Omega_L);
   return H0 * sqrt(factor);
-
 }
-#endif
-
+  #endif
 
-
-
-
-__global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array )
+__global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev,
+                                          Real *vel_y_dev, Real *vel_z_dev, Real *dti_array)
 {
   __shared__ Real max_dti[TPB_PARTICLES];
 
@@ -37,7 +36,7 @@ __global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy,
   int tid;
 
   // get a global thread ID
-  id = blockIdx.x * blockDim.x + threadIdx.x ;
+  id = blockIdx.x * blockDim.x + threadIdx.x;
   // and a thread id within the block
   tid = threadIdx.x;
 
@@ -50,39 +49,38 @@ __global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy,
   // if( tid == 0 ) printf("%f  %f  %f \n", dx, dy, dz );
 
   // threads corresponding to real cells do the calculation
-  if (id < n_local ){
+  if (id < n_local) {
     // every thread collects the variables it needs from global memory
-    vx =  vel_x_dev[id];
-    vy =  vel_y_dev[id];
-    vz =  vel_z_dev[id];
-    max_dti[tid] = fmax( fabs(vx)/dx, fabs(vy)/dy);
-    max_dti[tid] = fmax( max_dti[tid], fabs(vz)/dz);
-    max_dti[tid] = fmax( max_dti[tid], 0.0);
+    vx           = vel_x_dev[id];
+    vy           = vel_y_dev[id];
+    vz           = vel_z_dev[id];
+    max_dti[tid] = fmax(fabs(vx) / dx, fabs(vy) / dy);
+    max_dti[tid] = fmax(max_dti[tid], fabs(vz) / dz);
+    max_dti[tid] = fmax(max_dti[tid], 0.0);
   }
   __syncthreads();
 
-  // do the reduction in shared memory (find the max inverse timestep in the block)
-  for (unsigned int s=1; s<blockDim.x; s*=2) {
-    if (tid % (2*s) == 0) {
+  // do the reduction in shared memory (find the max inverse timestep in the
+  // block)
+  for (unsigned int s = 1; s < blockDim.x; s *= 2) {
+    if (tid % (2 * s) == 0) {
       max_dti[tid] = fmax(max_dti[tid], max_dti[tid + s]);
     }
     __syncthreads();
   }
 
   // write the result for this block to global memory
-  if (tid == 0) dti_array[blockIdx.x] = max_dti[0];
-
+  if (tid == 0) {
+    dti_array[blockIdx.x] = max_dti[0];
+  }
 }
 
-
-
-
-
-Real Particles_3D::Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_particles_local, Real dx, Real dy, Real dz, Real *vel_x, Real *vel_y, Real *vel_z, Real *dti_array_host, Real *dti_array_dev ){
-
-
+Real Particles3D::Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_particles_local, Real dx, Real dy, Real dz,
+                                                 Real *vel_x, Real *vel_y, Real *vel_z, Real *dti_array_host,
+                                                 Real *dti_array_dev)
+{
   // // set values for GPU kernels
-  // int ngrid =  (Particles.n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  // int ngrid =  (Particles.n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -95,61 +93,64 @@ Real Particles_3D::Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_parti
     return 0;
   }
 
-  hipLaunchKernelGGL(Calc_Particles_dti_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_particles_local, dx, dy, dz, vel_x, vel_y, vel_z, dti_array_dev );
-  CudaCheckError();
+  hipLaunchKernelGGL(Calc_Particles_dti_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_particles_local, dx, dy, dz, vel_x,
+                     vel_y, vel_z, dti_array_dev);
+  GPU_Error_Check();
 
   // Initialize dt values
   Real max_dti = 0;
   // copy the dti array onto the CPU
-  CudaSafeCall( cudaMemcpy(dti_array_host, dti_array_dev, ngrid*sizeof(Real), cudaMemcpyDeviceToHost) );
+  GPU_Error_Check(cudaMemcpy(dti_array_host, dti_array_dev, ngrid * sizeof(Real), cudaMemcpyDeviceToHost));
   // find maximum inverse timestep from CFL condition
-  for (int i=0; i<ngrid; i++) {
+  for (int i = 0; i < ngrid; i++) {
     max_dti = fmax(max_dti, dti_array_host[i]);
   }
 
   return max_dti;
-
-
-
 }
 
-
-
-
-__global__ void Advance_Particles_KDK_Step1_Kernel( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){
-
-  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid >= n_local) return;
+__global__ void Advance_Particles_KDK_Step1_Kernel(part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev,
+                                                   Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev,
+                                                   Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev)
+{
+  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid >= n_local) {
+    return;
+  }
 
   // Advance velocities by half a step
   vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid];
   vel_y_dev[tid] += 0.5 * dt * grav_y_dev[tid];
   vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid];
 
-  //Advance Positions using advanced velocities
+  // Advance Positions using advanced velocities
   pos_x_dev[tid] += dt * vel_x_dev[tid];
   pos_y_dev[tid] += dt * vel_y_dev[tid];
   pos_z_dev[tid] += dt * vel_z_dev[tid];
 }
 
-
-__global__ void Advance_Particles_KDK_Step2_Kernel( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){
-
-  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid >= n_local) return;
+__global__ void Advance_Particles_KDK_Step2_Kernel(part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev,
+                                                   Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev,
+                                                   Real *grav_z_dev)
+{
+  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid >= n_local) {
+    return;
+  }
 
   // Advance velocities by the second half a step
   vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid];
   vel_y_dev[tid] += 0.5 * dt * grav_y_dev[tid];
   vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid];
-
 }
 
-
-void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev  ){
-
+void Particles3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev,
+                                                           Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev,
+                                                           Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev,
+                                                           Real *grav_y_dev, Real *grav_z_dev)
+{
   // set values for GPU kernels
-  int ngrid =  (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  int ngrid = (n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -157,16 +158,18 @@ void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local,
 
   // Only runs if there are local particles
   if (n_local > 0) {
-    hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local, dt, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev );
-    CudaCheckError();
+    hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, pos_x_dev,
+                       pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev);
+    GPU_Error_Check();
   }
 }
 
-
-void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, Real dt,  Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev  ){
-
+void Particles3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev,
+                                                           Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev,
+                                                           Real *grav_y_dev, Real *grav_z_dev)
+{
   // set values for GPU kernels
-  int ngrid =  (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  int ngrid = (n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -174,35 +177,39 @@ void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local,
 
   // Only runs if there are local particles
   if (n_local > 0) {
-    hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local, dt, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev );
-    CudaCheckError();
+    hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, vel_x_dev,
+                       vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev);
+    GPU_Error_Check();
   }
 }
 
+  #ifdef COSMOLOGY
 
-#ifdef COSMOLOGY
-
-
-__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){
-
-  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid >= n_local) return;
+__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel(part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev,
+                                                         Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev,
+                                                         Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev,
+                                                         Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h,
+                                                         Real Omega_M, Real Omega_L, Real Omega_K)
+{
+  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid >= n_local) {
+    return;
+  }
 
   Real vel_x, vel_y, vel_z;
   vel_x = vel_x_dev[tid];
   vel_y = vel_y_dev[tid];
   vel_z = vel_z_dev[tid];
 
-
   Real da_half, a_half, H, H_half, dt, dt_half;
-  da_half = da/2;
-  a_half = current_a + da_half;
+  da_half = da / 2;
+  a_half  = current_a + da_half;
 
-  H = Get_Hubble_Parameter_dev( current_a, H0, Omega_M, Omega_L, Omega_K );
-  H_half = Get_Hubble_Parameter_dev( a_half, H0, Omega_M, Omega_L, Omega_K );
+  H      = Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K);
+  H_half = Get_Hubble_Parameter_dev(a_half, H0, Omega_M, Omega_L, Omega_K);
 
-  dt = da / ( current_a * H ) * cosmo_h;
-  dt_half = da / ( a_half * H_half ) * cosmo_h / ( a_half );
+  dt      = da / (current_a * H) * cosmo_h;
+  dt_half = da / (a_half * H_half) * cosmo_h / (a_half);
 
   // if ( tid == 0 ) printf( "dt: %f\n", dt);
   // if ( tid == 0 ) printf( "pos_x: %f\n", pos_x_dev[tid]);
@@ -210,24 +217,28 @@ __global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( part_int_t n_local, Re
   // if ( tid == 0 ) printf( "grav_x: %f\n", grav_x_dev[tid]);
 
   // Advance velocities by half a step
-  vel_x = ( current_a*vel_x + 0.5*dt*grav_x_dev[tid] ) / a_half;
-  vel_y = ( current_a*vel_y + 0.5*dt*grav_y_dev[tid] ) / a_half;
-  vel_z = ( current_a*vel_z + 0.5*dt*grav_z_dev[tid] ) / a_half;
+  vel_x          = (current_a * vel_x + 0.5 * dt * grav_x_dev[tid]) / a_half;
+  vel_y          = (current_a * vel_y + 0.5 * dt * grav_y_dev[tid]) / a_half;
+  vel_z          = (current_a * vel_z + 0.5 * dt * grav_z_dev[tid]) / a_half;
   vel_x_dev[tid] = vel_x;
   vel_y_dev[tid] = vel_y;
   vel_z_dev[tid] = vel_z;
 
-  //Advance Positions using advanced velocities
+  // Advance Positions using advanced velocities
   pos_x_dev[tid] += dt_half * vel_x;
   pos_y_dev[tid] += dt_half * vel_y;
   pos_z_dev[tid] += dt_half * vel_z;
 }
 
-
-__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){
-
-  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ;
-  if ( tid >= n_local) return;
+__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel(part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev,
+                                                         Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev,
+                                                         Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h,
+                                                         Real Omega_M, Real Omega_L, Real Omega_K)
+{
+  part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x;
+  if (tid >= n_local) {
+    return;
+  }
 
   Real vel_x, vel_y, vel_z;
   vel_x = vel_x_dev[tid];
@@ -235,23 +246,26 @@ __global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( part_int_t n_local, Re
   vel_z = vel_z_dev[tid];
 
   Real da_half, a_half, dt;
-  da_half = da/2;
-  a_half = current_a - da_half;
+  da_half = da / 2;
+  a_half  = current_a - da_half;
 
-  dt = da / ( current_a * Get_Hubble_Parameter_dev( current_a, H0, Omega_M, Omega_L, Omega_K ) ) * cosmo_h;
+  dt = da / (current_a * Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K)) * cosmo_h;
 
   // Advance velocities by the second half a step
-  vel_x_dev[tid] = ( a_half*vel_x + 0.5*dt*grav_x_dev[tid] ) / current_a;
-  vel_y_dev[tid] = ( a_half*vel_y + 0.5*dt*grav_y_dev[tid] ) / current_a;
-  vel_z_dev[tid] = ( a_half*vel_z + 0.5*dt*grav_z_dev[tid] ) / current_a;
-
+  vel_x_dev[tid] = (a_half * vel_x + 0.5 * dt * grav_x_dev[tid]) / current_a;
+  vel_y_dev[tid] = (a_half * vel_y + 0.5 * dt * grav_y_dev[tid]) / current_a;
+  vel_z_dev[tid] = (a_half * vel_z + 0.5 * dt * grav_z_dev[tid]) / current_a;
 }
 
-
-void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K  ){
-
+void Particles3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev,
+                                                                 Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev,
+                                                                 Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev,
+                                                                 Real *grav_y_dev, Real *grav_z_dev, Real current_a,
+                                                                 Real H0, Real cosmo_h, Real Omega_M, Real Omega_L,
+                                                                 Real Omega_K)
+{
   // set values for GPU kernels
-  int ngrid =  (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  int ngrid = (n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -259,19 +273,22 @@ void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_
 
   // Only runs if there are local particles
   if (n_local > 0) {
-    hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local, delta_a, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K );
-    CHECK(cudaDeviceSynchronize());
-  // CudaCheckError();
+    hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a,
+                       pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev,
+                       grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K);
+    GPU_Error_Check(cudaDeviceSynchronize());
+    // GPU_Error_Check();
   }
-
 }
 
-
-
-void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K  ){
-
+void Particles3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev,
+                                                                 Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev,
+                                                                 Real *grav_y_dev, Real *grav_z_dev, Real current_a,
+                                                                 Real H0, Real cosmo_h, Real Omega_M, Real Omega_L,
+                                                                 Real Omega_K)
+{
   // set values for GPU kernels
-  int ngrid =  (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES;
+  int ngrid = (n_local - 1) / TPB_PARTICLES + 1;
   // number of blocks per 1D grid
   dim3 dim1dGrid(ngrid, 1, 1);
   //  number of threads per 1D block
@@ -279,15 +296,14 @@ void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_
 
   // Only runs if there are local particles
   if (n_local > 0) {
-    hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0,  n_local, delta_a, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K );
-    CHECK(cudaDeviceSynchronize());
-  // CudaCheckError();
+    hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a,
+                       vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h,
+                       Omega_M, Omega_L, Omega_K);
+    GPU_Error_Check(cudaDeviceSynchronize());
+    // GPU_Error_Check();
   }
 }
 
-#endif //COSMOLOGY
-
-
-
+  #endif  // COSMOLOGY
 
 #endif
diff --git a/src/particles/starburst99_snr.txt b/src/particles/starburst99_snr.txt
new file mode 100755
index 000000000..449821025
--- /dev/null
+++ b/src/particles/starburst99_snr.txt
@@ -0,0 +1,1007 @@
+ MODEL DESIGNATION: MW_center           
+ MODEL GENERATED: Mon Nov 28 15:05:08 2022
+
+              RESULTS FOR THE SUPERNOVA RATE
+
+                     ALL SUPERNOVAE               TYPE IB SUPERNOVAE               ALL SUPERNOVAE           STARS + SUPERNOVAE
+    TIME       TOTAL RATE  POWER   ENERGY    TOTAL RATE  POWER   ENERGY   TYPICAL MASS   LOWEST PROG. MASS    POWER   ENERGY
+   0.100E+05    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.937   51.436
+   0.110E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.943   52.483
+   0.210E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.950   52.767
+   0.310E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.956   52.940
+   0.410E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.962   53.064
+   0.510E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.968   53.162
+   0.610E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.974   53.243
+   0.710E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.979   53.312
+   0.810E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.985   53.372
+   0.910E+06    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.990   53.426
+   0.101E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.996   53.474
+   0.111E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.001   53.518
+   0.121E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.005   53.558
+   0.131E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.010   53.595
+   0.141E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.013   53.629
+   0.151E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.015   53.662
+   0.161E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.016   53.691
+   0.171E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.016   53.719
+   0.181E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.013   53.746
+   0.191E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.007   53.770
+   0.201E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.997   53.792
+   0.211E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.983   53.813
+   0.221E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.962   53.832
+   0.231E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.932   53.849
+   0.241E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.946   53.866
+   0.251E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           39.970   53.883
+   0.261E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.016   53.901
+   0.271E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.026   53.919
+   0.281E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.009   53.936
+   0.291E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.094   53.955
+   0.301E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.202   53.978
+   0.311E+07    -30.000  -30.000  -30.000     -30.000  -30.000  -30.000        0.0             0.0           40.265   54.004
+   0.321E+07     -4.388   39.113   51.612      -4.388   39.113   51.612       99.7            99.3           40.333   54.032
+   0.331E+07     -3.308   40.193   52.727      -3.308   40.193   52.727       96.2            91.5           40.545   54.075
+   0.341E+07     -3.317   40.184   53.007      -3.317   40.184   53.007       88.8            84.8           40.534   54.113
+   0.351E+07     -3.306   40.195   53.179      -3.306   40.195   53.179       82.5            79.0           40.509   54.145
+   0.361E+07     -3.296   40.205   53.304      -3.296   40.205   53.304       77.0            73.9           40.484   54.174
+   0.371E+07     -3.266   40.235   53.408      -3.266   40.235   53.408       72.1            69.5           40.482   54.201
+   0.381E+07     -3.277   40.223   53.489      -3.277   40.223   53.489       67.9            65.5           40.454   54.225
+   0.391E+07     -3.269   40.232   53.559      -3.269   40.232   53.559       64.1            61.9           40.402   54.245
+   0.401E+07     -3.266   40.235   53.620      -3.266   40.235   53.620       60.7            58.8           40.372   54.263
+   0.411E+07     -3.269   40.232   53.672      -3.269   40.232   53.672       57.7            56.0           40.349   54.280
+   0.421E+07     -3.265   40.236   53.720      -3.265   40.236   53.720       55.0            53.5           40.335   54.295
+   0.431E+07     -3.262   40.239   53.763      -3.262   40.239   53.763       52.5            51.2           40.333   54.310
+   0.441E+07     -3.258   40.243   53.803      -3.258   40.243   53.803       50.3            49.1           40.328   54.324
+   0.451E+07     -3.255   40.246   53.839      -3.255   40.246   53.839       48.3            47.1           40.324   54.337
+   0.461E+07     -3.251   40.249   53.873      -3.251   40.249   53.873       46.4            45.4           40.319   54.350
+   0.471E+07     -3.227   40.274   53.906      -3.495   40.006   53.891       44.7            43.7           40.332   54.363
+   0.481E+07     -3.245   40.255   53.936     -30.000  -30.000   53.891       43.1            42.2           40.312   54.375
+   0.491E+07     -3.243   40.258   53.964     -30.000  -30.000   53.891       41.7            40.8           40.313   54.387
+   0.501E+07     -3.249   40.252   53.989     -30.000  -30.000   53.891       40.3            39.5           40.301   54.398
+   0.511E+07     -3.263   40.238   54.013     -30.000  -30.000   53.891       39.1            38.4           40.283   54.408
+   0.521E+07     -3.264   40.237   54.035     -30.000  -30.000   53.891       37.9            37.3           40.275   54.418
+   0.531E+07     -3.243   40.258   54.058     -30.000  -30.000   53.891       36.9            36.3           40.291   54.428
+   0.541E+07     -3.266   40.235   54.078     -30.000  -30.000   53.891       35.9            35.3           40.269   54.438
+   0.551E+07     -3.267   40.234   54.097      -3.693   39.808   53.902       35.0            34.4           40.266   54.447
+   0.561E+07     -3.267   40.234   54.115      -3.267   40.234   53.931       34.1            33.6           40.263   54.456
+   0.571E+07     -3.268   40.233   54.133      -3.268   40.233   53.957       33.3            32.8           40.258   54.464
+   0.581E+07     -3.269   40.232   54.150      -3.269   40.232   53.982       32.5            32.0           40.255   54.473
+   0.591E+07     -3.267   40.234   54.166      -3.267   40.234   54.006       31.7            31.3           40.254   54.481
+   0.601E+07     -3.267   40.234   54.182      -3.267   40.234   54.029       31.0            30.6           40.250   54.489
+   0.611E+07     -3.268   40.233   54.197      -3.268   40.233   54.050       30.4            30.0           40.250   54.497
+   0.621E+07     -3.268   40.233   54.212      -3.268   40.233   54.071       29.7            29.4           40.250   54.505
+   0.631E+07     -3.268   40.233   54.226      -3.268   40.233   54.090       29.1            28.8           40.250   54.512
+   0.641E+07     -3.269   40.232   54.240      -3.269   40.232   54.109       28.6            28.2           40.246   54.519
+   0.651E+07     -3.269   40.232   54.253      -3.269   40.232   54.126       28.0            27.7           40.245   54.527
+   0.661E+07     -3.269   40.232   54.266      -3.404   40.097   54.139       27.5            27.2           40.244   54.534
+   0.671E+07     -3.270   40.231   54.278     -30.000  -30.000   54.139       27.0            26.7           40.243   54.541
+   0.681E+07     -3.270   40.231   54.290     -30.000  -30.000   54.139       26.5            26.2           40.242   54.548
+   0.691E+07     -3.271   40.230   54.302     -30.000  -30.000   54.139       26.0            25.8           40.241   54.554
+   0.701E+07     -3.271   40.230   54.314     -30.000  -30.000   54.139       25.6            25.3           40.241   54.561
+   0.711E+07     -3.273   40.228   54.325     -30.000  -30.000   54.139       25.2            24.9           40.239   54.567
+   0.721E+07     -3.280   40.221   54.335     -30.000  -30.000   54.139       24.8            24.5           40.232   54.574
+   0.731E+07     -3.282   40.219   54.346     -30.000  -30.000   54.139       24.4            24.1           40.231   54.580
+   0.741E+07     -3.283   40.218   54.356     -30.000  -30.000   54.139       24.0            23.8           40.229   54.586
+   0.751E+07     -3.285   40.216   54.366     -30.000  -30.000   54.139       23.6            23.4           40.228   54.592
+   0.761E+07     -3.286   40.215   54.375     -30.000  -30.000   54.139       23.3            23.1           40.227   54.598
+   0.771E+07     -3.287   40.214   54.385     -30.000  -30.000   54.139       23.0            22.8           40.225   54.604
+   0.781E+07     -3.288   40.212   54.394     -30.000  -30.000   54.139       22.6            22.5           40.224   54.609
+   0.791E+07     -3.290   40.211   54.403     -30.000  -30.000   54.139       22.3            22.2           40.223   54.615
+   0.801E+07     -3.291   40.210   54.411     -30.000  -30.000   54.139       22.0            21.9           40.221   54.620
+   0.811E+07     -3.292   40.208   54.420     -30.000  -30.000   54.139       21.7            21.6           40.219   54.626
+   0.821E+07     -3.294   40.207   54.428     -30.000  -30.000   54.139       21.5            21.3           40.216   54.631
+   0.831E+07     -3.295   40.206   54.436     -30.000  -30.000   54.139       21.2            21.0           40.214   54.636
+   0.841E+07     -3.296   40.204   54.444     -30.000  -30.000   54.139       20.9            20.8           40.212   54.641
+   0.851E+07     -3.298   40.203   54.452     -30.000  -30.000   54.139       20.7            20.5           40.210   54.646
+   0.861E+07     -3.299   40.202   54.460     -30.000  -30.000   54.139       20.4            20.3           40.208   54.651
+   0.871E+07     -3.300   40.201   54.467     -30.000  -30.000   54.139       20.2            20.0           40.207   54.656
+   0.881E+07     -3.328   40.173   54.474     -30.000  -30.000   54.139       20.0            19.8           40.179   54.661
+   0.891E+07     -3.340   40.161   54.481     -30.000  -30.000   54.139       19.8            19.6           40.167   54.665
+   0.901E+07     -3.344   40.157   54.487     -30.000  -30.000   54.139       19.6            19.4           40.162   54.669
+   0.911E+07     -3.348   40.153   54.493     -30.000  -30.000   54.139       19.4            19.2           40.158   54.674
+   0.921E+07     -3.352   40.149   54.500     -30.000  -30.000   54.139       19.2            19.1           40.154   54.678
+   0.931E+07     -3.356   40.145   54.506     -30.000  -30.000   54.139       19.0            18.9           40.149   54.682
+   0.941E+07     -3.360   40.141   54.511     -30.000  -30.000   54.139       18.8            18.7           40.145   54.686
+   0.951E+07     -3.363   40.137   54.517     -30.000  -30.000   54.139       18.6            18.5           40.141   54.690
+   0.961E+07     -3.367   40.134   54.523     -30.000  -30.000   54.139       18.5            18.4           40.137   54.694
+   0.971E+07     -3.371   40.130   54.528     -30.000  -30.000   54.139       18.3            18.2           40.134   54.697
+   0.981E+07     -3.374   40.127   54.534     -30.000  -30.000   54.139       18.1            18.1           40.130   54.701
+   0.991E+07     -3.378   40.123   54.539     -30.000  -30.000   54.139       18.0            17.9           40.126   54.705
+   0.100E+08     -3.381   40.120   54.544     -30.000  -30.000   54.139       17.8            17.8           40.123   54.708
+   0.101E+08     -3.385   40.116   54.549     -30.000  -30.000   54.139       17.7            17.6           40.119   54.712
+   0.102E+08     -3.388   40.113   54.554     -30.000  -30.000   54.139       17.6            17.5           40.116   54.715
+   0.103E+08     -3.391   40.110   54.559     -30.000  -30.000   54.139       17.4            17.3           40.112   54.719
+   0.104E+08     -3.394   40.107   54.564     -30.000  -30.000   54.139       17.3            17.2           40.109   54.722
+   0.105E+08     -3.398   40.103   54.569     -30.000  -30.000   54.139       17.2            17.1           40.106   54.725
+   0.106E+08     -3.401   40.100   54.573     -30.000  -30.000   54.139       17.0            16.9           40.102   54.728
+   0.107E+08     -3.404   40.097   54.578     -30.000  -30.000   54.139       16.9            16.8           40.099   54.732
+   0.108E+08     -3.407   40.094   54.582     -30.000  -30.000   54.139       16.8            16.7           40.096   54.735
+   0.109E+08     -3.410   40.091   54.587     -30.000  -30.000   54.139       16.7            16.6           40.093   54.738
+   0.110E+08     -3.413   40.088   54.591     -30.000  -30.000   54.139       16.5            16.5           40.090   54.741
+   0.111E+08     -3.416   40.085   54.595     -30.000  -30.000   54.139       16.4            16.4           40.087   54.744
+   0.112E+08     -3.418   40.083   54.600     -30.000  -30.000   54.139       16.3            16.3           40.084   54.747
+   0.113E+08     -3.421   40.080   54.604     -30.000  -30.000   54.139       16.2            16.1           40.081   54.750
+   0.114E+08     -3.424   40.077   54.608     -30.000  -30.000   54.139       16.1            16.0           40.078   54.753
+   0.115E+08     -3.427   40.074   54.612     -30.000  -30.000   54.139       16.0            15.9           40.076   54.756
+   0.116E+08     -3.430   40.071   54.616     -30.000  -30.000   54.139       15.9            15.8           40.073   54.759
+   0.117E+08     -3.432   40.069   54.620     -30.000  -30.000   54.139       15.8            15.7           40.070   54.761
+   0.118E+08     -3.435   40.066   54.623     -30.000  -30.000   54.139       15.7            15.6           40.067   54.764
+   0.119E+08     -3.438   40.063   54.627     -30.000  -30.000   54.139       15.6            15.5           40.064   54.767
+   0.120E+08     -3.440   40.061   54.631     -30.000  -30.000   54.139       15.5            15.5           40.062   54.770
+   0.121E+08     -3.443   40.058   54.634     -30.000  -30.000   54.139       15.4            15.4           40.059   54.772
+   0.122E+08     -3.445   40.056   54.638     -30.000  -30.000   54.139       15.3            15.3           40.056   54.775
+   0.123E+08     -3.448   40.053   54.642     -30.000  -30.000   54.139       15.2            15.2           40.054   54.777
+   0.124E+08     -3.450   40.051   54.645     -30.000  -30.000   54.139       15.1            15.1           40.051   54.780
+   0.125E+08     -3.453   40.048   54.649     -30.000  -30.000   54.139       15.1            15.0           40.049   54.783
+   0.126E+08     -3.454   40.047   54.652     -30.000  -30.000   54.139       15.0            14.9           40.048   54.785
+   0.127E+08     -3.456   40.045   54.655     -30.000  -30.000   54.139       14.9            14.8           40.045   54.788
+   0.128E+08     -3.458   40.042   54.659     -30.000  -30.000   54.139       14.8            14.8           40.043   54.790
+   0.129E+08     -3.461   40.040   54.662     -30.000  -30.000   54.139       14.7            14.7           40.041   54.792
+   0.130E+08     -3.463   40.038   54.665     -30.000  -30.000   54.139       14.7            14.6           40.038   54.795
+   0.131E+08     -3.465   40.035   54.668     -30.000  -30.000   54.139       14.6            14.5           40.036   54.797
+   0.132E+08     -3.468   40.033   54.672     -30.000  -30.000   54.139       14.5            14.5           40.034   54.800
+   0.133E+08     -3.470   40.031   54.675     -30.000  -30.000   54.139       14.4            14.4           40.032   54.802
+   0.134E+08     -3.472   40.029   54.678     -30.000  -30.000   54.139       14.4            14.3           40.029   54.804
+   0.135E+08     -3.474   40.027   54.681     -30.000  -30.000   54.139       14.3            14.2           40.027   54.807
+   0.136E+08     -3.477   40.024   54.684     -30.000  -30.000   54.139       14.2            14.2           40.025   54.809
+   0.137E+08     -3.479   40.022   54.687     -30.000  -30.000   54.139       14.1            14.1           40.023   54.811
+   0.138E+08     -3.481   40.020   54.690     -30.000  -30.000   54.139       14.1            14.0           40.021   54.813
+   0.139E+08     -3.483   40.018   54.693     -30.000  -30.000   54.139       14.0            14.0           40.019   54.815
+   0.140E+08     -3.485   40.016   54.696     -30.000  -30.000   54.139       13.9            13.9           40.016   54.818
+   0.141E+08     -3.487   40.014   54.698     -30.000  -30.000   54.139       13.9            13.8           40.014   54.820
+   0.142E+08     -3.489   40.012   54.701     -30.000  -30.000   54.139       13.8            13.8           40.012   54.822
+   0.143E+08     -3.491   40.010   54.704     -30.000  -30.000   54.139       13.7            13.7           40.010   54.824
+   0.144E+08     -3.493   40.008   54.707     -30.000  -30.000   54.139       13.7            13.7           40.008   54.826
+   0.145E+08     -3.495   40.006   54.709     -30.000  -30.000   54.139       13.6            13.6           40.006   54.828
+   0.146E+08     -3.497   40.004   54.712     -30.000  -30.000   54.139       13.6            13.5           40.004   54.830
+   0.147E+08     -3.499   40.002   54.715     -30.000  -30.000   54.139       13.5            13.5           40.002   54.832
+   0.148E+08     -3.501   40.000   54.717     -30.000  -30.000   54.139       13.4            13.4           40.000   54.834
+   0.149E+08     -3.503   39.998   54.720     -30.000  -30.000   54.139       13.4            13.4           39.998   54.836
+   0.150E+08     -3.505   39.996   54.723     -30.000  -30.000   54.139       13.3            13.3           39.996   54.838
+   0.151E+08     -3.507   39.994   54.725     -30.000  -30.000   54.139       13.3            13.2           39.994   54.840
+   0.152E+08     -3.509   39.992   54.728     -30.000  -30.000   54.139       13.2            13.2           39.992   54.842
+   0.153E+08     -3.511   39.990   54.730     -30.000  -30.000   54.139       13.2            13.1           39.990   54.844
+   0.154E+08     -3.513   39.988   54.733     -30.000  -30.000   54.139       13.1            13.1           39.988   54.846
+   0.155E+08     -3.515   39.986   54.735     -30.000  -30.000   54.139       13.1            13.0           39.986   54.848
+   0.156E+08     -3.517   39.984   54.738     -30.000  -30.000   54.139       13.0            13.0           39.985   54.850
+   0.157E+08     -3.518   39.983   54.740     -30.000  -30.000   54.139       13.0            12.9           39.983   54.852
+   0.158E+08     -3.520   39.981   54.742     -30.000  -30.000   54.139       12.9            12.9           39.981   54.853
+   0.159E+08     -3.522   39.979   54.745     -30.000  -30.000   54.139       12.9            12.8           39.979   54.855
+   0.160E+08     -3.524   39.977   54.747     -30.000  -30.000   54.139       12.8            12.8           39.977   54.857
+   0.161E+08     -3.526   39.975   54.749     -30.000  -30.000   54.139       12.8            12.7           39.975   54.859
+   0.162E+08     -3.528   39.973   54.752     -30.000  -30.000   54.139       12.7            12.7           39.974   54.861
+   0.163E+08     -3.529   39.972   54.754     -30.000  -30.000   54.139       12.7            12.6           39.972   54.862
+   0.164E+08     -3.531   39.970   54.756     -30.000  -30.000   54.139       12.6            12.6           39.970   54.864
+   0.165E+08     -3.533   39.968   54.758     -30.000  -30.000   54.139       12.6            12.5           39.968   54.866
+   0.166E+08     -3.535   39.966   54.761     -30.000  -30.000   54.139       12.5            12.5           39.967   54.868
+   0.167E+08     -3.536   39.965   54.763     -30.000  -30.000   54.139       12.5            12.4           39.965   54.869
+   0.168E+08     -3.538   39.963   54.765     -30.000  -30.000   54.139       12.4            12.4           39.963   54.871
+   0.169E+08     -3.540   39.961   54.767     -30.000  -30.000   54.139       12.4            12.4           39.961   54.873
+   0.170E+08     -3.541   39.960   54.769     -30.000  -30.000   54.139       12.3            12.3           39.960   54.874
+   0.171E+08     -3.543   39.958   54.771     -30.000  -30.000   54.139       12.3            12.3           39.958   54.876
+   0.172E+08     -3.545   39.956   54.773     -30.000  -30.000   54.139       12.3            12.2           39.956   54.878
+   0.173E+08     -3.546   39.955   54.775     -30.000  -30.000   54.139       12.2            12.2           39.955   54.879
+   0.174E+08     -3.548   39.953   54.778     -30.000  -30.000   54.139       12.2            12.1           39.953   54.881
+   0.175E+08     -3.550   39.951   54.780     -30.000  -30.000   54.139       12.1            12.1           39.951   54.883
+   0.176E+08     -3.551   39.950   54.782     -30.000  -30.000   54.139       12.1            12.1           39.950   54.884
+   0.177E+08     -3.553   39.948   54.784     -30.000  -30.000   54.139       12.0            12.0           39.948   54.886
+   0.178E+08     -3.554   39.947   54.786     -30.000  -30.000   54.139       12.0            12.0           39.947   54.887
+   0.179E+08     -3.554   39.947   54.788     -30.000  -30.000   54.139       12.0            11.9           39.947   54.889
+   0.180E+08     -3.556   39.945   54.790     -30.000  -30.000   54.139       11.9            11.9           39.945   54.890
+   0.181E+08     -3.557   39.944   54.791     -30.000  -30.000   54.139       11.9            11.9           39.944   54.892
+   0.182E+08     -3.559   39.942   54.793     -30.000  -30.000   54.139       11.8            11.8           39.942   54.894
+   0.183E+08     -3.560   39.941   54.795     -30.000  -30.000   54.139       11.8            11.8           39.941   54.895
+   0.184E+08     -3.562   39.939   54.797     -30.000  -30.000   54.139       11.8            11.8           39.939   54.897
+   0.185E+08     -3.563   39.938   54.799     -30.000  -30.000   54.139       11.7            11.7           39.938   54.898
+   0.186E+08     -3.565   39.936   54.801     -30.000  -30.000   54.139       11.7            11.7           39.936   54.900
+   0.187E+08     -3.566   39.935   54.803     -30.000  -30.000   54.139       11.7            11.6           39.935   54.901
+   0.188E+08     -3.568   39.933   54.805     -30.000  -30.000   54.139       11.6            11.6           39.933   54.903
+   0.189E+08     -3.569   39.932   54.807     -30.000  -30.000   54.139       11.6            11.6           39.932   54.904
+   0.190E+08     -3.571   39.930   54.808     -30.000  -30.000   54.139       11.6            11.5           39.931   54.905
+   0.191E+08     -3.572   39.929   54.810     -30.000  -30.000   54.139       11.5            11.5           39.929   54.907
+   0.192E+08     -3.573   39.928   54.812     -30.000  -30.000   54.139       11.5            11.5           39.928   54.908
+   0.193E+08     -3.575   39.926   54.814     -30.000  -30.000   54.139       11.5            11.4           39.926   54.910
+   0.194E+08     -3.576   39.924   54.816     -30.000  -30.000   54.139       11.4            11.4           39.925   54.911
+   0.195E+08     -3.578   39.923   54.817     -30.000  -30.000   54.139       11.4            11.4           39.923   54.913
+   0.196E+08     -3.579   39.922   54.819     -30.000  -30.000   54.139       11.4            11.3           39.922   54.914
+   0.197E+08     -3.581   39.920   54.821     -30.000  -30.000   54.139       11.3            11.3           39.920   54.915
+   0.198E+08     -3.582   39.919   54.822     -30.000  -30.000   54.139       11.3            11.3           39.919   54.917
+   0.199E+08     -3.583   39.918   54.824     -30.000  -30.000   54.139       11.3            11.2           39.918   54.918
+   0.200E+08     -3.585   39.916   54.826     -30.000  -30.000   54.139       11.2            11.2           39.916   54.919
+   0.201E+08     -3.586   39.915   54.828     -30.000  -30.000   54.139       11.2            11.2           39.915   54.921
+   0.202E+08     -3.587   39.913   54.829     -30.000  -30.000   54.139       11.2            11.1           39.914   54.922
+   0.203E+08     -3.589   39.912   54.831     -30.000  -30.000   54.139       11.1            11.1           39.912   54.924
+   0.204E+08     -3.590   39.911   54.832     -30.000  -30.000   54.139       11.1            11.1           39.911   54.925
+   0.205E+08     -3.592   39.909   54.834     -30.000  -30.000   54.139       11.1            11.1           39.909   54.926
+   0.206E+08     -3.593   39.908   54.836     -30.000  -30.000   54.139       11.0            11.0           39.908   54.927
+   0.207E+08     -3.594   39.907   54.837     -30.000  -30.000   54.139       11.0            11.0           39.907   54.929
+   0.208E+08     -3.595   39.905   54.839     -30.000  -30.000   54.139       11.0            11.0           39.906   54.930
+   0.209E+08     -3.597   39.904   54.841     -30.000  -30.000   54.139       10.9            10.9           39.904   54.931
+   0.210E+08     -3.598   39.903   54.842     -30.000  -30.000   54.139       10.9            10.9           39.903   54.933
+   0.211E+08     -3.599   39.901   54.844     -30.000  -30.000   54.139       10.9            10.9           39.902   54.934
+   0.212E+08     -3.601   39.900   54.845     -30.000  -30.000   54.139       10.9            10.8           39.900   54.935
+   0.213E+08     -3.602   39.899   54.847     -30.000  -30.000   54.139       10.8            10.8           39.899   54.936
+   0.214E+08     -3.603   39.897   54.848     -30.000  -30.000   54.139       10.8            10.8           39.898   54.938
+   0.215E+08     -3.604   39.896   54.850     -30.000  -30.000   54.139       10.8            10.8           39.897   54.939
+   0.216E+08     -3.606   39.895   54.851     -30.000  -30.000   54.139       10.8            10.7           39.895   54.940
+   0.217E+08     -3.607   39.894   54.853     -30.000  -30.000   54.139       10.7            10.7           39.894   54.941
+   0.218E+08     -3.609   39.892   54.854     -30.000  -30.000   54.139       10.7            10.7           39.892   54.943
+   0.219E+08     -3.610   39.891   54.856     -30.000  -30.000   54.139       10.7            10.7           39.891   54.944
+   0.220E+08     -3.611   39.890   54.857     -30.000  -30.000   54.139       10.6            10.6           39.890   54.945
+   0.221E+08     -3.612   39.889   54.859     -30.000  -30.000   54.139       10.6            10.6           39.889   54.946
+   0.222E+08     -3.613   39.887   54.860     -30.000  -30.000   54.139       10.6            10.6           39.887   54.947
+   0.223E+08     -3.615   39.886   54.862     -30.000  -30.000   54.139       10.6            10.6           39.886   54.949
+   0.224E+08     -3.616   39.885   54.863     -30.000  -30.000   54.139       10.5            10.5           39.885   54.950
+   0.225E+08     -3.617   39.884   54.865     -30.000  -30.000   54.139       10.5            10.5           39.884   54.951
+   0.226E+08     -3.618   39.882   54.866     -30.000  -30.000   54.139       10.5            10.5           39.883   54.952
+   0.227E+08     -3.620   39.881   54.867     -30.000  -30.000   54.139       10.5            10.4           39.881   54.953
+   0.228E+08     -3.621   39.880   54.869     -30.000  -30.000   54.139       10.4            10.4           39.880   54.954
+   0.229E+08     -3.622   39.879   54.870     -30.000  -30.000   54.139       10.4            10.4           39.879   54.956
+   0.230E+08     -3.623   39.878   54.872     -30.000  -30.000   54.139       10.4            10.4           39.878   54.957
+   0.231E+08     -3.624   39.876   54.873     -30.000  -30.000   54.139       10.4            10.4           39.876   54.958
+   0.232E+08     -3.626   39.875   54.874     -30.000  -30.000   54.139       10.3            10.3           39.875   54.959
+   0.233E+08     -3.627   39.874   54.876     -30.000  -30.000   54.139       10.3            10.3           39.874   54.960
+   0.234E+08     -3.628   39.873   54.877     -30.000  -30.000   54.139       10.3            10.3           39.873   54.961
+   0.235E+08     -3.629   39.872   54.878     -30.000  -30.000   54.139       10.3            10.3           39.872   54.962
+   0.236E+08     -3.630   39.871   54.880     -30.000  -30.000   54.139       10.2            10.2           39.871   54.964
+   0.237E+08     -3.632   39.869   54.881     -30.000  -30.000   54.139       10.2            10.2           39.869   54.965
+   0.238E+08     -3.632   39.868   54.882     -30.000  -30.000   54.139       10.2            10.2           39.868   54.966
+   0.239E+08     -3.634   39.867   54.884     -30.000  -30.000   54.139       10.2            10.2           39.867   54.967
+   0.240E+08     -3.635   39.866   54.885     -30.000  -30.000   54.139       10.2            10.1           39.866   54.968
+   0.241E+08     -3.636   39.865   54.886     -30.000  -30.000   54.139       10.1            10.1           39.865   54.969
+   0.242E+08     -3.637   39.863   54.888     -30.000  -30.000   54.139       10.1            10.1           39.863   54.970
+   0.243E+08     -3.638   39.863   54.889     -30.000  -30.000   54.139       10.1            10.1           39.863   54.971
+   0.244E+08     -3.640   39.861   54.890     -30.000  -30.000   54.139       10.1            10.1           39.861   54.972
+   0.245E+08     -3.641   39.860   54.892     -30.000  -30.000   54.139       10.0            10.0           39.860   54.973
+   0.246E+08     -3.642   39.859   54.893     -30.000  -30.000   54.139       10.0            10.0           39.859   54.974
+   0.247E+08     -3.643   39.858   54.894     -30.000  -30.000   54.139       10.0            10.0           39.858   54.975
+   0.248E+08     -3.644   39.857   54.895     -30.000  -30.000   54.139       10.0            10.0           39.857   54.976
+   0.249E+08     -3.645   39.856   54.897     -30.000  -30.000   54.139       10.0             9.9           39.856   54.977
+   0.250E+08     -3.646   39.855   54.898     -30.000  -30.000   54.139        9.9             9.9           39.855   54.978
+   0.251E+08     -3.647   39.854   54.899     -30.000  -30.000   54.139        9.9             9.9           39.854   54.979
+   0.252E+08     -3.648   39.852   54.900     -30.000  -30.000   54.139        9.9             9.9           39.852   54.980
+   0.253E+08     -3.650   39.851   54.902     -30.000  -30.000   54.139        9.9             9.9           39.851   54.981
+   0.254E+08     -3.651   39.850   54.903     -30.000  -30.000   54.139        9.9             9.8           39.850   54.983
+   0.255E+08     -3.652   39.849   54.904     -30.000  -30.000   54.139        9.8             9.8           39.849   54.984
+   0.256E+08     -3.653   39.848   54.905     -30.000  -30.000   54.139        9.8             9.8           39.848   54.985
+   0.257E+08     -3.654   39.847   54.906     -30.000  -30.000   54.139        9.8             9.8           39.847   54.986
+   0.258E+08     -3.655   39.846   54.908     -30.000  -30.000   54.139        9.8             9.8           39.846   54.987
+   0.259E+08     -3.656   39.845   54.909     -30.000  -30.000   54.139        9.8             9.7           39.845   54.987
+   0.260E+08     -3.657   39.844   54.910     -30.000  -30.000   54.139        9.7             9.7           39.844   54.988
+   0.261E+08     -3.658   39.843   54.911     -30.000  -30.000   54.139        9.7             9.7           39.843   54.989
+   0.262E+08     -3.659   39.842   54.912     -30.000  -30.000   54.139        9.7             9.7           39.842   54.990
+   0.263E+08     -3.660   39.841   54.913     -30.000  -30.000   54.139        9.7             9.7           39.841   54.991
+   0.264E+08     -3.661   39.839   54.915     -30.000  -30.000   54.139        9.7             9.7           39.840   54.992
+   0.265E+08     -3.662   39.839   54.916     -30.000  -30.000   54.139        9.6             9.6           39.839   54.993
+   0.266E+08     -3.663   39.838   54.917     -30.000  -30.000   54.139        9.6             9.6           39.838   54.994
+   0.267E+08     -3.665   39.836   54.918     -30.000  -30.000   54.139        9.6             9.6           39.836   54.995
+   0.268E+08     -3.665   39.836   54.919     -30.000  -30.000   54.139        9.6             9.6           39.836   54.996
+   0.269E+08     -3.667   39.834   54.920     -30.000  -30.000   54.139        9.6             9.6           39.834   54.997
+   0.270E+08     -3.667   39.833   54.921     -30.000  -30.000   54.139        9.6             9.5           39.834   54.998
+   0.271E+08     -3.669   39.832   54.923     -30.000  -30.000   54.139        9.5             9.5           39.832   54.999
+   0.272E+08     -3.669   39.831   54.924     -30.000  -30.000   54.139        9.5             9.5           39.832   55.000
+   0.273E+08     -3.671   39.830   54.925     -30.000  -30.000   54.139        9.5             9.5           39.830   55.001
+   0.274E+08     -3.671   39.829   54.926     -30.000  -30.000   54.139        9.5             9.5           39.829   55.002
+   0.275E+08     -3.673   39.828   54.927     -30.000  -30.000   54.139        9.5             9.5           39.828   55.003
+   0.276E+08     -3.674   39.827   54.928     -30.000  -30.000   54.139        9.4             9.4           39.827   55.004
+   0.277E+08     -3.674   39.827   54.929     -30.000  -30.000   54.139        9.4             9.4           39.827   55.005
+   0.278E+08     -3.676   39.825   54.930     -30.000  -30.000   54.139        9.4             9.4           39.825   55.005
+   0.279E+08     -3.677   39.824   54.931     -30.000  -30.000   54.139        9.4             9.4           39.824   55.006
+   0.280E+08     -3.677   39.823   54.932     -30.000  -30.000   54.139        9.4             9.4           39.824   55.007
+   0.281E+08     -3.679   39.822   54.933     -30.000  -30.000   54.139        9.4             9.3           39.822   55.008
+   0.282E+08     -3.679   39.821   54.934     -30.000  -30.000   54.139        9.3             9.3           39.821   55.009
+   0.283E+08     -3.681   39.820   54.935     -30.000  -30.000   54.139        9.3             9.3           39.820   55.010
+   0.284E+08     -3.681   39.820   54.937     -30.000  -30.000   54.139        9.3             9.3           39.820   55.011
+   0.285E+08     -3.683   39.818   54.938     -30.000  -30.000   54.139        9.3             9.3           39.818   55.012
+   0.286E+08     -3.684   39.817   54.939     -30.000  -30.000   54.139        9.3             9.3           39.817   55.013
+   0.287E+08     -3.684   39.817   54.940     -30.000  -30.000   54.139        9.3             9.3           39.817   55.013
+   0.288E+08     -3.685   39.815   54.941     -30.000  -30.000   54.139        9.2             9.2           39.815   55.014
+   0.289E+08     -3.686   39.815   54.942     -30.000  -30.000   54.139        9.2             9.2           39.815   55.015
+   0.290E+08     -3.688   39.813   54.943     -30.000  -30.000   54.139        9.2             9.2           39.813   55.016
+   0.291E+08     -3.688   39.813   54.944     -30.000  -30.000   54.139        9.2             9.2           39.813   55.017
+   0.292E+08     -3.689   39.812   54.945     -30.000  -30.000   54.139        9.2             9.2           39.812   55.018
+   0.293E+08     -3.690   39.811   54.946     -30.000  -30.000   54.139        9.2             9.2           39.811   55.019
+   0.294E+08     -3.691   39.810   54.947     -30.000  -30.000   54.139        9.1             9.1           39.810   55.019
+   0.295E+08     -3.692   39.809   54.948     -30.000  -30.000   54.139        9.1             9.1           39.809   55.020
+   0.296E+08     -3.693   39.808   54.949     -30.000  -30.000   54.139        9.1             9.1           39.808   55.021
+   0.297E+08     -3.694   39.807   54.950     -30.000  -30.000   54.139        9.1             9.1           39.807   55.022
+   0.298E+08     -3.695   39.806   54.951     -30.000  -30.000   54.139        9.1             9.1           39.806   55.023
+   0.299E+08     -3.696   39.805   54.952     -30.000  -30.000   54.139        9.1             9.1           39.805   55.024
+   0.300E+08     -3.697   39.804   54.953     -30.000  -30.000   54.139        9.1             9.0           39.804   55.024
+   0.301E+08     -3.697   39.803   54.954     -30.000  -30.000   54.139        9.0             9.0           39.804   55.025
+   0.302E+08     -3.699   39.802   54.955     -30.000  -30.000   54.139        9.0             9.0           39.802   55.026
+   0.303E+08     -3.700   39.801   54.956     -30.000  -30.000   54.139        9.0             9.0           39.801   55.027
+   0.304E+08     -3.685   39.816   54.957     -30.000  -30.000   54.139        9.0             9.0           39.816   55.028
+   0.305E+08     -3.678   39.822   54.958     -30.000  -30.000   54.139        9.0             9.0           39.823   55.029
+   0.306E+08     -3.679   39.822   54.959     -30.000  -30.000   54.139        9.0             9.0           39.822   55.029
+   0.307E+08     -3.679   39.821   54.960     -30.000  -30.000   54.139        9.0             8.9           39.821   55.030
+   0.308E+08     -3.680   39.821   54.961     -30.000  -30.000   54.139        8.9             8.9           39.821   55.031
+   0.309E+08     -3.681   39.820   54.962     -30.000  -30.000   54.139        8.9             8.9           39.820   55.032
+   0.310E+08     -3.681   39.819   54.963     -30.000  -30.000   54.139        8.9             8.9           39.819   55.033
+   0.311E+08     -3.682   39.819   54.964     -30.000  -30.000   54.139        8.9             8.9           39.819   55.034
+   0.312E+08     -3.683   39.818   54.964     -30.000  -30.000   54.139        8.9             8.9           39.818   55.034
+   0.313E+08     -3.683   39.818   54.965     -30.000  -30.000   54.139        8.9             8.9           39.818   55.035
+   0.314E+08     -3.684   39.817   54.966     -30.000  -30.000   54.139        8.8             8.8           39.817   55.036
+   0.315E+08     -3.685   39.816   54.967     -30.000  -30.000   54.139        8.8             8.8           39.816   55.037
+   0.316E+08     -3.685   39.816   54.968     -30.000  -30.000   54.139        8.8             8.8           39.816   55.038
+   0.317E+08     -3.686   39.815   54.969     -30.000  -30.000   54.139        8.8             8.8           39.815   55.039
+   0.318E+08     -3.686   39.814   54.970     -30.000  -30.000   54.139        8.8             8.8           39.814   55.039
+   0.319E+08     -3.687   39.814   54.971     -30.000  -30.000   54.139        8.8             8.8           39.814   55.040
+   0.320E+08     -3.688   39.813   54.972     -30.000  -30.000   54.139        8.8             8.8           39.813   55.041
+   0.321E+08     -3.688   39.813   54.973     -30.000  -30.000   54.139        8.7             8.7           39.813   55.042
+   0.322E+08     -3.689   39.812   54.974     -30.000  -30.000   54.139        8.7             8.7           39.812   55.043
+   0.323E+08     -3.690   39.811   54.975     -30.000  -30.000   54.139        8.7             8.7           39.811   55.043
+   0.324E+08     -3.690   39.811   54.976     -30.000  -30.000   54.139        8.7             8.7           39.811   55.044
+   0.325E+08     -3.691   39.810   54.977     -30.000  -30.000   54.139        8.7             8.7           39.810   55.045
+   0.326E+08     -3.691   39.810   54.978     -30.000  -30.000   54.139        8.7             8.7           39.810   55.046
+   0.327E+08     -3.692   39.809   54.979     -30.000  -30.000   54.139        8.7             8.7           39.809   55.047
+   0.328E+08     -3.692   39.809   54.980     -30.000  -30.000   54.139        8.7             8.6           39.809   55.047
+   0.329E+08     -3.693   39.808   54.981     -30.000  -30.000   54.139        8.6             8.6           39.808   55.048
+   0.330E+08     -3.693   39.808   54.982     -30.000  -30.000   54.139        8.6             8.6           39.808   55.049
+   0.331E+08     -3.694   39.807   54.982     -30.000  -30.000   54.139        8.6             8.6           39.807   55.050
+   0.332E+08     -3.694   39.807   54.983     -30.000  -30.000   54.139        8.6             8.6           39.807   55.051
+   0.333E+08     -3.695   39.806   54.984     -30.000  -30.000   54.139        8.6             8.6           39.806   55.051
+   0.334E+08     -3.696   39.805   54.985     -30.000  -30.000   54.139        8.6             8.6           39.805   55.052
+   0.335E+08     -3.696   39.805   54.986     -30.000  -30.000   54.139        8.6             8.5           39.805   55.053
+   0.336E+08     -3.697   39.804   54.987     -30.000  -30.000   54.139        8.5             8.5           39.804   55.054
+   0.337E+08     -3.697   39.803   54.988     -30.000  -30.000   54.139        8.5             8.5           39.804   55.054
+   0.338E+08     -3.698   39.803   54.989     -30.000  -30.000   54.139        8.5             8.5           39.803   55.055
+   0.339E+08     -3.699   39.802   54.990     -30.000  -30.000   54.139        8.5             8.5           39.802   55.056
+   0.340E+08     -3.699   39.802   54.991     -30.000  -30.000   54.139        8.5             8.5           39.802   55.057
+   0.341E+08     -3.700   39.801   54.991     -30.000  -30.000   54.139        8.5             8.5           39.801   55.058
+   0.342E+08     -3.701   39.800   54.992     -30.000  -30.000   54.139        8.5             8.5           39.800   55.058
+   0.343E+08     -3.701   39.800   54.993     -30.000  -30.000   54.139        8.5             8.4           39.800   55.059
+   0.344E+08     -3.702   39.799   54.994     -30.000  -30.000   54.139        8.4             8.4           39.799   55.060
+   0.345E+08     -3.702   39.799   54.995     -30.000  -30.000   54.139        8.4             8.4           39.799   55.061
+   0.346E+08     -3.703   39.798   54.996     -30.000  -30.000   54.139        8.4             8.4           39.798   55.061
+   0.347E+08     -3.703   39.797   54.997     -30.000  -30.000   54.139        8.4             8.4           39.798   55.062
+   0.348E+08     -3.703   39.798   54.998     -30.000  -30.000   54.139        8.4             8.4           39.798   55.063
+   0.349E+08     -3.705   39.796   54.998     -30.000  -30.000   54.139        8.4             8.4           39.796   55.064
+   0.350E+08     -3.705   39.796   54.999     -30.000  -30.000   54.139        8.4             8.4           39.796   55.064
+   0.351E+08     -3.706   39.795   55.000     -30.000  -30.000   54.139        8.4             8.3           39.795   55.065
+   0.352E+08     -3.706   39.795   55.001     -30.000  -30.000   54.139        8.3             8.3           39.795   55.066
+   0.353E+08     -3.707   39.794   55.002     -30.000  -30.000   54.139        8.3             8.3           39.794   55.066
+   0.354E+08     -3.707   39.794   55.003     -30.000  -30.000   54.139        8.3             8.3           39.794   55.067
+   0.355E+08     -3.708   39.793   55.004     -30.000  -30.000   54.139        8.3             8.3           39.793   55.068
+   0.356E+08     -3.708   39.793   55.004     -30.000  -30.000   54.139        8.3             8.3           39.793   55.069
+   0.357E+08     -3.709   39.792   55.005     -30.000  -30.000   54.139        8.3             8.3           39.792   55.069
+   0.358E+08     -3.709   39.792   55.006     -30.000  -30.000   54.139        8.3             8.3           39.792   55.070
+   0.359E+08     -3.710   39.791   55.007     -30.000  -30.000   54.139        8.3             8.3           39.791   55.071
+   0.360E+08     -3.710   39.791   55.008     -30.000  -30.000   54.139        8.2             8.2           39.791   55.072
+   0.361E+08     -3.711   39.790   55.009     -30.000  -30.000   54.139        8.2             8.2           39.790   55.072
+   0.362E+08     -3.712   39.789   55.009     -30.000  -30.000   54.139        8.2             8.2           39.789   55.073
+   0.363E+08     -3.712   39.789   55.010     -30.000  -30.000   54.139        8.2             8.2           39.789   55.074
+   0.364E+08     -3.713   39.788   55.011     -30.000  -30.000   54.139        8.2             8.2           39.788   55.074
+   0.365E+08     -3.713   39.788   55.012     -30.000  -30.000   54.139        8.2             8.2           39.788   55.075
+   0.366E+08     -3.713   39.788   55.013     -30.000  -30.000   54.139        8.2             8.2           39.788   55.076
+   0.367E+08     -3.714   39.787   55.013     -30.000  -30.000   54.139        8.2             8.2           39.787   55.076
+   0.368E+08     -3.715   39.786   55.014     -30.000  -30.000   54.139        8.2             8.1           39.786   55.077
+   0.369E+08     -3.715   39.786   55.015     -30.000  -30.000   54.139        8.1             8.1           39.786   55.078
+   0.370E+08     -3.716   39.785   55.016     -30.000  -30.000   54.139        8.1             8.1           39.785   55.079
+   0.371E+08     -3.716   39.785   55.017     -30.000  -30.000   54.139        8.1             8.1           39.785   55.079
+   0.372E+08     -3.717   39.784   55.017     -30.000  -30.000   54.139        8.1             8.1           39.784   55.080
+   0.373E+08     -3.717   39.784   55.018     -30.000  -30.000   54.139        8.1             8.1           39.784   55.081
+   0.374E+08     -3.718   39.783   55.019     -30.000  -30.000   54.139        8.1             8.1           39.783   55.081
+   0.375E+08     -3.718   39.783   55.020     -30.000  -30.000   54.139        8.1             8.1           39.783   55.082
+   0.376E+08     -3.719   39.782   55.021     -30.000  -30.000   54.139        8.1             8.1           39.782   55.083
+   0.377E+08     -3.719   39.781   55.021     -30.000  -30.000   54.139        8.1             8.0           39.781   55.083
+   0.378E+08     -3.720   39.781   55.022     -30.000  -30.000   54.139        8.0             8.0           39.781   55.084
+   0.379E+08     -3.720   39.781   55.023     -30.000  -30.000   54.139        8.0             8.0           39.781   55.085
+   0.380E+08     -3.721   39.780   55.024     -30.000  -30.000   54.139        8.0             8.0           39.780   55.085
+   0.381E+08     -3.722   39.779   55.025     -30.000  -30.000   54.139        8.0             8.0           39.779   55.086
+   0.382E+08     -3.922   39.579   55.025     -30.000  -30.000   54.139        8.0             0.0           39.579   55.087
+   0.383E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.745   55.087
+   0.384E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.746   55.087
+   0.385E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.744   55.087
+   0.386E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.736   55.087
+   0.387E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.738   55.087
+   0.388E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.741   55.087
+   0.389E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.739   55.087
+   0.390E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.740   55.087
+   0.391E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.739   55.087
+   0.392E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.731   55.087
+   0.393E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.727   55.087
+   0.394E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.731   55.087
+   0.395E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.728   55.087
+   0.396E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.731   55.087
+   0.397E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.723   55.087
+   0.398E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.724   55.087
+   0.399E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.724   55.087
+   0.400E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.719   55.087
+   0.401E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.719   55.087
+   0.402E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.716   55.087
+   0.403E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.717   55.087
+   0.404E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.716   55.087
+   0.405E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.717   55.087
+   0.406E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.716   55.087
+   0.407E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.705   55.087
+   0.408E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.703   55.087
+   0.409E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.712   55.087
+   0.410E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.710   55.087
+   0.411E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.702   55.087
+   0.412E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.702   55.087
+   0.413E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.697   55.087
+   0.414E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.697   55.087
+   0.415E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.701   55.087
+   0.416E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.696   55.087
+   0.417E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.696   55.087
+   0.418E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.693   55.087
+   0.419E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.693   55.087
+   0.420E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.692   55.087
+   0.421E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.687   55.087
+   0.422E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.684   55.087
+   0.423E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.685   55.087
+   0.424E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.683   55.087
+   0.425E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.685   55.087
+   0.426E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.682   55.087
+   0.427E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.682   55.087
+   0.428E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.679   55.087
+   0.429E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.679   55.087
+   0.430E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.679   55.087
+   0.431E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.680   55.087
+   0.432E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.675   55.087
+   0.433E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.675   55.087
+   0.434E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.669   55.087
+   0.435E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.673   55.087
+   0.436E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.669   55.087
+   0.437E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.661   55.087
+   0.438E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.666   55.087
+   0.439E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.670   55.087
+   0.440E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.668   55.087
+   0.441E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.661   55.087
+   0.442E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.663   55.087
+   0.443E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.668   55.087
+   0.444E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.667   55.087
+   0.445E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.659   55.087
+   0.446E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.657   55.087
+   0.447E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.651   55.087
+   0.448E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.668   55.087
+   0.449E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.666   55.087
+   0.450E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.663   55.087
+   0.451E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.656   55.087
+   0.452E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.646   55.087
+   0.453E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.650   55.087
+   0.454E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.647   55.087
+   0.455E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.658   55.087
+   0.456E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.658   55.087
+   0.457E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.645   55.087
+   0.458E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.645   55.087
+   0.459E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.648   55.087
+   0.460E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.652   55.087
+   0.461E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.652   55.087
+   0.462E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.646   55.087
+   0.463E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.653   55.087
+   0.464E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.643   55.087
+   0.465E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.649   55.087
+   0.466E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.649   55.087
+   0.467E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.650   55.087
+   0.468E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.650   55.087
+   0.469E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.639   55.087
+   0.470E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.643   55.087
+   0.471E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.645   55.087
+   0.472E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.644   55.087
+   0.473E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.643   55.087
+   0.474E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.631   55.087
+   0.475E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.625   55.087
+   0.476E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.644   55.087
+   0.477E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.639   55.087
+   0.478E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.626   55.087
+   0.479E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.633   55.087
+   0.480E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.635   55.087
+   0.481E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.627   55.087
+   0.482E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.628   55.087
+   0.483E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.614   55.087
+   0.484E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.618   55.087
+   0.485E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.606   55.087
+   0.486E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.606   55.087
+   0.487E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.606   55.087
+   0.488E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.605   55.087
+   0.489E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.596   55.087
+   0.490E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.597   55.087
+   0.491E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.598   55.087
+   0.492E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.578   55.087
+   0.493E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.575   55.087
+   0.494E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.576   55.087
+   0.495E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.587   55.087
+   0.496E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.574   55.087
+   0.497E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.573   55.087
+   0.498E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.572   55.087
+   0.499E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.571   55.087
+   0.500E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.562   55.087
+   0.501E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.561   55.087
+   0.502E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.555   55.087
+   0.503E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.554   55.087
+   0.504E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.562   55.087
+   0.505E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.549   55.087
+   0.506E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.547   55.087
+   0.507E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.547   55.087
+   0.508E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.546   55.087
+   0.509E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.534   55.087
+   0.510E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.528   55.087
+   0.511E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.543   55.087
+   0.512E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.526   55.087
+   0.513E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.528   55.087
+   0.514E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.527   55.087
+   0.515E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.526   55.087
+   0.516E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.522   55.087
+   0.517E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.509   55.087
+   0.518E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.507   55.087
+   0.519E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.506   55.087
+   0.520E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.501   55.087
+   0.521E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.500   55.087
+   0.522E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.494   55.087
+   0.523E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.484   55.087
+   0.524E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.492   55.087
+   0.525E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.496   55.087
+   0.526E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.481   55.087
+   0.527E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.466   55.087
+   0.528E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.480   55.087
+   0.529E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.478   55.087
+   0.530E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.478   55.087
+   0.531E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.459   55.087
+   0.532E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.462   55.087
+   0.533E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.475   55.087
+   0.534E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.453   55.087
+   0.535E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.456   55.087
+   0.536E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.451   55.087
+   0.537E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.453   55.087
+   0.538E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.453   55.087
+   0.539E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.452   55.087
+   0.540E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.444   55.087
+   0.541E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.438   55.087
+   0.542E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.434   55.087
+   0.543E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.433   55.087
+   0.544E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.432   55.087
+   0.545E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.431   55.087
+   0.546E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.430   55.087
+   0.547E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.429   55.087
+   0.548E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.409   55.087
+   0.549E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.408   55.087
+   0.550E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.417   55.087
+   0.551E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.406   55.087
+   0.552E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.405   55.087
+   0.553E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.405   55.087
+   0.554E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.404   55.087
+   0.555E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.403   55.087
+   0.556E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.398   55.087
+   0.557E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.401   55.087
+   0.558E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.396   55.087
+   0.559E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.400   55.087
+   0.560E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.388   55.087
+   0.561E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.381   55.087
+   0.562E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.376   55.087
+   0.563E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.375   55.087
+   0.564E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.371   55.087
+   0.565E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.357   55.087
+   0.566E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.356   55.087
+   0.567E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.352   55.087
+   0.568E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.354   55.087
+   0.569E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.354   55.087
+   0.570E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.345   55.087
+   0.571E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.344   55.087
+   0.572E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.343   55.087
+   0.573E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.350   55.087
+   0.574E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.326   55.087
+   0.575E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.345   55.087
+   0.576E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.347   55.087
+   0.577E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.347   55.087
+   0.578E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.328   55.087
+   0.579E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.327   55.087
+   0.580E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.321   55.087
+   0.581E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.326   55.087
+   0.582E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.328   55.087
+   0.583E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.324   55.087
+   0.584E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.315   55.087
+   0.585E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.322   55.087
+   0.586E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.304   55.087
+   0.587E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.303   55.087
+   0.588E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.288   55.087
+   0.589E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.280   55.087
+   0.590E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.276   55.087
+   0.591E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.275   55.087
+   0.592E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.274   55.087
+   0.593E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.273   55.087
+   0.594E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.272   55.087
+   0.595E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.273   55.087
+   0.596E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.272   55.087
+   0.597E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.272   55.087
+   0.598E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.250   55.087
+   0.599E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.267   55.087
+   0.600E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.248   55.087
+   0.601E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.248   55.087
+   0.602E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.247   55.087
+   0.603E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.246   55.087
+   0.604E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.245   55.087
+   0.605E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.244   55.087
+   0.606E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.236   55.087
+   0.607E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.215   55.087
+   0.608E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.198   55.087
+   0.609E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.198   55.087
+   0.610E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.213   55.087
+   0.611E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.212   55.087
+   0.612E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.195   55.087
+   0.613E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.194   55.087
+   0.614E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.188   55.087
+   0.615E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.187   55.087
+   0.616E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.183   55.087
+   0.617E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.182   55.087
+   0.618E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.173   55.087
+   0.619E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.172   55.087
+   0.620E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.166   55.087
+   0.621E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.168   55.087
+   0.622E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.169   55.087
+   0.623E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.167   55.087
+   0.624E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.125   55.087
+   0.625E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.162   55.087
+   0.626E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.151   55.087
+   0.627E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.126   55.087
+   0.628E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.118   55.087
+   0.629E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.080   55.087
+   0.630E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.079   55.087
+   0.631E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.078   55.087
+   0.632E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.078   55.087
+   0.633E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.077   55.087
+   0.634E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.076   55.087
+   0.635E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.101   55.087
+   0.636E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.075   55.087
+   0.637E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.074   55.087
+   0.638E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.074   55.087
+   0.639E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.094   55.087
+   0.640E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.040   55.087
+   0.641E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.039   55.087
+   0.642E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.064   55.087
+   0.643E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.037   55.087
+   0.644E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.062   55.087
+   0.645E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.024   55.087
+   0.646E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.023   55.087
+   0.647E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.022   55.087
+   0.648E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.034   55.087
+   0.649E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.988   55.087
+   0.650E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.036   55.087
+   0.651E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.987   55.087
+   0.652E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.986   55.087
+   0.653E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           35.003   55.087
+   0.654E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.985   55.087
+   0.655E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.984   55.087
+   0.656E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.983   55.087
+   0.657E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.982   55.087
+   0.658E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.982   55.087
+   0.659E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.981   55.087
+   0.660E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.997   55.087
+   0.661E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.951   55.087
+   0.662E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.895   55.087
+   0.663E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.894   55.087
+   0.664E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.962   55.087
+   0.665E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.849   55.087
+   0.666E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.930   55.087
+   0.667E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.848   55.087
+   0.668E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.890   55.087
+   0.669E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.886   55.087
+   0.670E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.889   55.087
+   0.671E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.888   55.087
+   0.672E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.888   55.087
+   0.673E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.844   55.087
+   0.674E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.796   55.087
+   0.675E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.842   55.087
+   0.676E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.842   55.087
+   0.677E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.791   55.087
+   0.678E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.793   55.087
+   0.679E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.789   55.087
+   0.680E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.839   55.087
+   0.681E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.791   55.087
+   0.682E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.726   55.087
+   0.683E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.725   55.087
+   0.684E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.786   55.087
+   0.685E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.724   55.087
+   0.686E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.723   55.087
+   0.687E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.722   55.087
+   0.688E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.722   55.087
+   0.689E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.640   55.087
+   0.690E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.639   55.087
+   0.691E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.639   55.087
+   0.692E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.642   55.087
+   0.693E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.637   55.087
+   0.694E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.534   55.087
+   0.695E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.614   55.087
+   0.696E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.613   55.087
+   0.697E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.638   55.087
+   0.698E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.634   55.087
+   0.699E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.633   55.087
+   0.700E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.633   55.087
+   0.701E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.533   55.087
+   0.702E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.631   55.087
+   0.703E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.627   55.087
+   0.704E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.527   55.087
+   0.705E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.362   55.087
+   0.706E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.523   55.087
+   0.707E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.427   55.087
+   0.708E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.360   55.087
+   0.709E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.425   55.087
+   0.710E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.263   55.087
+   0.711E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.150   55.087
+   0.712E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           34.059   55.087
+   0.713E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.412   55.087
+   0.714E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.411   55.087
+   0.715E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.410   55.087
+   0.716E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.408   55.087
+   0.717E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.407   55.087
+   0.718E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.406   55.087
+   0.719E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.405   55.087
+   0.720E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.404   55.087
+   0.721E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.402   55.087
+   0.722E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.401   55.087
+   0.723E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.401   55.087
+   0.724E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.399   55.087
+   0.725E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.398   55.087
+   0.726E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.397   55.087
+   0.727E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.395   55.087
+   0.728E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.394   55.087
+   0.729E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.393   55.087
+   0.730E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.392   55.087
+   0.731E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.391   55.087
+   0.732E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.390   55.087
+   0.733E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.389   55.087
+   0.734E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.388   55.087
+   0.735E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.387   55.087
+   0.736E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.386   55.087
+   0.737E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.384   55.087
+   0.738E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.383   55.087
+   0.739E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.382   55.087
+   0.740E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.381   55.087
+   0.741E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.379   55.087
+   0.742E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.379   55.087
+   0.743E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.377   55.087
+   0.744E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.377   55.087
+   0.745E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.375   55.087
+   0.746E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.375   55.087
+   0.747E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.373   55.087
+   0.748E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.372   55.087
+   0.749E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.370   55.087
+   0.750E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.370   55.087
+   0.751E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.369   55.087
+   0.752E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.368   55.087
+   0.753E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.367   55.087
+   0.754E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.365   55.087
+   0.755E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.364   55.087
+   0.756E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.363   55.087
+   0.757E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.362   55.087
+   0.758E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.361   55.087
+   0.759E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.360   55.087
+   0.760E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.359   55.087
+   0.761E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.358   55.087
+   0.762E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.357   55.087
+   0.763E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.356   55.087
+   0.764E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.354   55.087
+   0.765E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.354   55.087
+   0.766E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.352   55.087
+   0.767E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.351   55.087
+   0.768E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.350   55.087
+   0.769E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.349   55.087
+   0.770E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.348   55.087
+   0.771E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.347   55.087
+   0.772E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.346   55.087
+   0.773E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.345   55.087
+   0.774E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.344   55.087
+   0.775E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.343   55.087
+   0.776E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.342   55.087
+   0.777E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.341   55.087
+   0.778E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.340   55.087
+   0.779E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.339   55.087
+   0.780E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.337   55.087
+   0.781E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.337   55.087
+   0.782E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.336   55.087
+   0.783E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.335   55.087
+   0.784E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.334   55.087
+   0.785E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.333   55.087
+   0.786E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.331   55.087
+   0.787E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.330   55.087
+   0.788E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.329   55.087
+   0.789E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.329   55.087
+   0.790E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.327   55.087
+   0.791E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.326   55.087
+   0.792E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.325   55.087
+   0.793E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.324   55.087
+   0.794E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.323   55.087
+   0.795E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.322   55.087
+   0.796E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.321   55.087
+   0.797E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.320   55.087
+   0.798E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.319   55.087
+   0.799E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.318   55.087
+   0.800E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.317   55.087
+   0.801E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.316   55.087
+   0.802E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.315   55.087
+   0.803E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.314   55.087
+   0.804E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.313   55.087
+   0.805E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.312   55.087
+   0.806E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.311   55.087
+   0.807E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.310   55.087
+   0.808E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.309   55.087
+   0.809E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.308   55.087
+   0.810E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.307   55.087
+   0.811E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.306   55.087
+   0.812E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.306   55.087
+   0.813E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.304   55.087
+   0.814E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.303   55.087
+   0.815E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.302   55.087
+   0.816E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.301   55.087
+   0.817E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.300   55.087
+   0.818E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.300   55.087
+   0.819E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.299   55.087
+   0.820E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.297   55.087
+   0.821E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.296   55.087
+   0.822E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.296   55.087
+   0.823E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.294   55.087
+   0.824E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.294   55.087
+   0.825E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.292   55.087
+   0.826E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.292   55.087
+   0.827E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.291   55.087
+   0.828E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.290   55.087
+   0.829E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.289   55.087
+   0.830E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.288   55.087
+   0.831E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.287   55.087
+   0.832E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.286   55.087
+   0.833E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.285   55.087
+   0.834E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.284   55.087
+   0.835E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.283   55.087
+   0.836E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.282   55.087
+   0.837E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.282   55.087
+   0.838E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.280   55.087
+   0.839E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.279   55.087
+   0.840E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.278   55.087
+   0.841E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.277   55.087
+   0.842E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.276   55.087
+   0.843E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.275   55.087
+   0.844E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.274   55.087
+   0.845E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.274   55.087
+   0.846E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.273   55.087
+   0.847E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.272   55.087
+   0.848E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.270   55.087
+   0.849E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.270   55.087
+   0.850E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.269   55.087
+   0.851E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.268   55.087
+   0.852E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.267   55.087
+   0.853E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.266   55.087
+   0.854E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.266   55.087
+   0.855E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.264   55.087
+   0.856E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.264   55.087
+   0.857E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.263   55.087
+   0.858E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.262   55.087
+   0.859E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.260   55.087
+   0.860E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.260   55.087
+   0.861E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.259   55.087
+   0.862E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.258   55.087
+   0.863E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.257   55.087
+   0.864E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.256   55.087
+   0.865E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.255   55.087
+   0.866E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.254   55.087
+   0.867E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.253   55.087
+   0.868E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.253   55.087
+   0.869E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.251   55.087
+   0.870E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.251   55.087
+   0.871E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.250   55.087
+   0.872E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.249   55.087
+   0.873E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.248   55.087
+   0.874E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.247   55.087
+   0.875E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.246   55.087
+   0.876E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.245   55.087
+   0.877E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.244   55.087
+   0.878E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.244   55.087
+   0.879E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.243   55.087
+   0.880E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.242   55.087
+   0.881E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.241   55.087
+   0.882E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.240   55.087
+   0.883E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.239   55.087
+   0.884E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.238   55.087
+   0.885E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.237   55.087
+   0.886E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.237   55.087
+   0.887E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.236   55.087
+   0.888E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.235   55.087
+   0.889E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.234   55.087
+   0.890E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.233   55.087
+   0.891E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.232   55.087
+   0.892E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.232   55.087
+   0.893E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.231   55.087
+   0.894E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.230   55.087
+   0.895E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.229   55.087
+   0.896E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.228   55.087
+   0.897E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.227   55.087
+   0.898E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.226   55.087
+   0.899E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.225   55.087
+   0.900E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.225   55.087
+   0.901E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.224   55.087
+   0.902E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.223   55.087
+   0.903E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.222   55.087
+   0.904E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.221   55.087
+   0.905E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.221   55.087
+   0.906E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.220   55.087
+   0.907E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.219   55.087
+   0.908E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.218   55.087
+   0.909E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.218   55.087
+   0.910E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.217   55.087
+   0.911E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.216   55.087
+   0.912E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.216   55.087
+   0.913E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.215   55.087
+   0.914E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.215   55.087
+   0.915E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.214   55.087
+   0.916E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.213   55.087
+   0.917E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.212   55.087
+   0.918E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.212   55.087
+   0.919E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.211   55.087
+   0.920E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.210   55.087
+   0.921E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.210   55.087
+   0.922E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.209   55.087
+   0.923E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.208   55.087
+   0.924E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.208   55.087
+   0.925E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.207   55.087
+   0.926E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.206   55.087
+   0.927E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.206   55.087
+   0.928E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.205   55.087
+   0.929E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.204   55.087
+   0.930E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.204   55.087
+   0.931E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.203   55.087
+   0.932E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.202   55.087
+   0.933E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.202   55.087
+   0.934E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.201   55.087
+   0.935E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.201   55.087
+   0.936E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.200   55.087
+   0.937E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.199   55.087
+   0.938E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.198   55.087
+   0.939E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.198   55.087
+   0.940E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.197   55.087
+   0.941E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.197   55.087
+   0.942E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.196   55.087
+   0.943E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.195   55.087
+   0.944E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.195   55.087
+   0.945E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.194   55.087
+   0.946E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.193   55.087
+   0.947E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.193   55.087
+   0.948E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.192   55.087
+   0.949E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.191   55.087
+   0.950E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.191   55.087
+   0.951E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.190   55.087
+   0.952E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.189   55.087
+   0.953E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.189   55.087
+   0.954E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.188   55.087
+   0.955E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.187   55.087
+   0.956E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.187   55.087
+   0.957E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.186   55.087
+   0.958E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.185   55.087
+   0.959E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.185   55.087
+   0.960E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.184   55.087
+   0.961E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.183   55.087
+   0.962E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.183   55.087
+   0.963E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.182   55.087
+   0.964E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.181   55.087
+   0.965E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.181   55.087
+   0.966E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.180   55.087
+   0.967E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.179   55.087
+   0.968E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.179   55.087
+   0.969E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.178   55.087
+   0.970E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.177   55.087
+   0.971E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.177   55.087
+   0.972E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.176   55.087
+   0.973E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.175   55.087
+   0.974E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.175   55.087
+   0.975E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.174   55.087
+   0.976E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.173   55.087
+   0.977E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.173   55.087
+   0.978E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.172   55.087
+   0.979E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.172   55.087
+   0.980E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.171   55.087
+   0.981E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.170   55.087
+   0.982E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.169   55.087
+   0.983E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.169   55.087
+   0.984E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.168   55.087
+   0.985E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.167   55.087
+   0.986E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.167   55.087
+   0.987E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.166   55.087
+   0.988E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.166   55.087
+   0.989E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.165   55.087
+   0.990E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.164   55.087
+   0.991E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.164   55.087
+   0.992E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.163   55.087
+   0.993E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.162   55.087
+   0.994E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.162   55.087
+   0.995E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.161   55.087
+   0.996E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.160   55.087
+   0.997E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.159   55.087
+   0.998E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.159   55.087
+   0.999E+08    -30.000  -30.000   55.025     -30.000  -30.000   54.139        0.0             0.0           33.158   55.087
diff --git a/src/particles/supernova.h b/src/particles/supernova.h
new file mode 100644
index 000000000..409b2e56b
--- /dev/null
+++ b/src/particles/supernova.h
@@ -0,0 +1,32 @@
+#pragma once
+#if defined(PARTICLES_GPU) && defined(SUPERNOVA)
+
+  #include "../analysis/feedback_analysis.h"
+  #include "../global/global.h"
+  #ifdef O_HIP
+    #include <hiprand.h>
+    #include <hiprand_kernel.h>
+  #else
+    #include <curand.h>
+    #include <curand_kernel.h>
+  #endif  // O_HIP
+
+namespace supernova
+{
+const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRES_ENERGY = 5;
+
+// supernova rate: 1SN / 100 solar masses per 36 Myr
+static const Real DEFAULT_SNR   = 2.8e-7;
+static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT * TIME_UNIT * TIME_UNIT / LENGTH_UNIT / LENGTH_UNIT;
+static const Real MASS_PER_SN   = 10.0;  // 10 solarMasses per SN
+static const Real FINAL_MOMENTUM =
+    2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT;  // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015)
+static const Real MU               = 0.6;
+static const Real R_SH             = 0.0302;  // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015)
+static const Real DEFAULT_SN_END   = 40000;   // default value for when SNe stop (40 Myr)
+static const Real DEFAULT_SN_START = 4000;    // default value for when SNe start (4 Myr)
+
+void initState(struct Parameters* P, part_int_t n_local, Real allocation_factor = 1);
+Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis);
+}  // namespace supernova
+#endif  // PARTICLES_GPU && SUPERNOVA
diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu
index 1964ddedf..e7264ca54 100644
--- a/src/reconstruction/pcm_cuda.cu
+++ b/src/reconstruction/pcm_cuda.cu
@@ -1,494 +1,424 @@
 /*! \file pcm_cuda.cu
  *  \brief Definitions of the piecewise constant reconstruction functions */
-#ifdef CUDA
 
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../reconstruction/pcm_cuda.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
+#include "../utils/mhd_utilities.h"
 
-
-__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields)
+__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells,
+                                      int n_ghost, Real gamma, int n_fields)
 {
-
   // declare conserved variables for each stencil
   // these will be placed into registers for each thread
   Real d, mx, my, mz, E;
 
-  #ifdef DE
+#ifdef DE
   Real ge;
-  #endif
+#endif  // DE
 
-  #ifdef SCALAR
+#ifdef SCALAR
   Real scalar[NSCALARS];
-  #endif
+#endif  // SCALAR
 
   // get a global thread ID
-  int xid = threadIdx.x + blockIdx.x*blockDim.x;
+  int xid = threadIdx.x + blockIdx.x * blockDim.x;
   int id;
 
-
   // threads corresponding to real cells plus one ghost cell do the calculation
-  if (xid < n_cells-1)
-  {
+  if (xid < n_cells - 1) {
     // retrieve appropriate conserved variables
     id = xid;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+    d  = dev_conserved[id];
+    mx = dev_conserved[n_cells + id];
+    my = dev_conserved[2 * n_cells + id];
+    mz = dev_conserved[3 * n_cells + id];
+    E  = dev_conserved[4 * n_cells + id];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar[i] = dev_conserved[(5 + i) * n_cells + id];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    ge = dev_conserved[(n_fields - 1) * n_cells + id];
+#endif  // DE
 
     // send values back from the kernel
-    dev_bounds_L[            id] = d;
-    dev_bounds_L[  n_cells + id] = mx;
-    dev_bounds_L[2*n_cells + id] = my;
-    dev_bounds_L[3*n_cells + id] = mz;
-    dev_bounds_L[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_L[(5+i)*n_cells + id] = scalar[i];
+    dev_bounds_L[id]               = d;
+    dev_bounds_L[n_cells + id]     = mx;
+    dev_bounds_L[2 * n_cells + id] = my;
+    dev_bounds_L[3 * n_cells + id] = mz;
+    dev_bounds_L[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_L[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_L[(n_fields-1)*n_cells + id] = ge;
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_L[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
 
     // retrieve appropriate conserved variables
-    id = xid+1;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+    id = xid + 1;
+    d  = dev_conserved[id];
+    mx = dev_conserved[n_cells + id];
+    my = dev_conserved[2 * n_cells + id];
+    mz = dev_conserved[3 * n_cells + id];
+    E  = dev_conserved[4 * n_cells + id];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar[i] = dev_conserved[(5 + i) * n_cells + id];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    ge = dev_conserved[(n_fields - 1) * n_cells + id];
+#endif  // DE
 
     // send values back from the kernel
-    id = xid;
-    dev_bounds_R[            id] = d;
-    dev_bounds_R[  n_cells + id] = mx;
-    dev_bounds_R[2*n_cells + id] = my;
-    dev_bounds_R[3*n_cells + id] = mz;
-    dev_bounds_R[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_R[(5+i)*n_cells + id] = scalar[i];
+    id                             = xid;
+    dev_bounds_R[id]               = d;
+    dev_bounds_R[n_cells + id]     = mx;
+    dev_bounds_R[2 * n_cells + id] = my;
+    dev_bounds_R[3 * n_cells + id] = mz;
+    dev_bounds_R[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_R[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_R[(n_fields-1)*n_cells + id] = ge;
-    #endif
-
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_R[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
   }
-
 }
 
-
-__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma, int n_fields)
+__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx,
+                                      Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma,
+                                      int n_fields)
 {
-
   // declare conserved variables for each stencil
   // these will be placed into registers for each thread
   Real d, mx, my, mz, E;
-  #ifdef DE
+#ifdef DE
   Real ge;
-  #endif
-  #ifdef SCALAR
+#endif  // DE
+#ifdef SCALAR
   Real scalar[NSCALARS];
-  #endif
+#endif  // SCALAR
 
-  int n_cells = nx*ny;
+  int n_cells = nx * ny;
 
   // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId * blockDim.x;
-  int yid = tid / nx;
-  int xid = tid - yid*nx;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
+  int yid     = tid / nx;
+  int xid     = tid - yid * nx;
   int id;
 
   // threads corresponding to real cells plus one ghost cell do the calculation
   // x direction
-  if (xid < nx-1 && yid < ny)
-  {
+  if (xid < nx - 1 && yid < ny) {
     // retrieve appropriate conserved variables
-    id = xid + yid*nx;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+    id = xid + yid * nx;
+    d  = dev_conserved[id];
+    mx = dev_conserved[n_cells + id];
+    my = dev_conserved[2 * n_cells + id];
+    mz = dev_conserved[3 * n_cells + id];
+    E  = dev_conserved[4 * n_cells + id];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar[i] = dev_conserved[(5 + i) * n_cells + id];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    ge = dev_conserved[(n_fields - 1) * n_cells + id];
+#endif  // DE
 
     // send values back from the kernel
-    dev_bounds_Lx[            id] = d;
-    dev_bounds_Lx[  n_cells + id] = mx;
-    dev_bounds_Lx[2*n_cells + id] = my;
-    dev_bounds_Lx[3*n_cells + id] = mz;
-    dev_bounds_Lx[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Lx[(5+i)*n_cells + id] = scalar[i];
+    dev_bounds_Lx[id]               = d;
+    dev_bounds_Lx[n_cells + id]     = mx;
+    dev_bounds_Lx[2 * n_cells + id] = my;
+    dev_bounds_Lx[3 * n_cells + id] = mz;
+    dev_bounds_Lx[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_Lx[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_Lx[(n_fields-1)*n_cells + id] = ge;
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_Lx[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
 
     // retrieve appropriate conserved variables
-    id = xid+1 + yid*nx;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+    id = xid + 1 + yid * nx;
+    d  = dev_conserved[id];
+    mx = dev_conserved[n_cells + id];
+    my = dev_conserved[2 * n_cells + id];
+    mz = dev_conserved[3 * n_cells + id];
+    E  = dev_conserved[4 * n_cells + id];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar[i] = dev_conserved[(5 + i) * n_cells + id];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    ge = dev_conserved[(n_fields - 1) * n_cells + id];
+#endif  // DE
 
     // send values back from the kernel
-    id = xid + yid*nx;
-    dev_bounds_Rx[            id] = d;
-    dev_bounds_Rx[  n_cells + id] = mx;
-    dev_bounds_Rx[2*n_cells + id] = my;
-    dev_bounds_Rx[3*n_cells + id] = mz;
-    dev_bounds_Rx[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Rx[(5+i)*n_cells + id] = scalar[i];
+    id                              = xid + yid * nx;
+    dev_bounds_Rx[id]               = d;
+    dev_bounds_Rx[n_cells + id]     = mx;
+    dev_bounds_Rx[2 * n_cells + id] = my;
+    dev_bounds_Rx[3 * n_cells + id] = mz;
+    dev_bounds_Rx[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_Rx[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_Rx[(n_fields-1)*n_cells + id] = ge;
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_Rx[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
   }
 
   // y direction
-  if (xid < nx && yid < ny-1)
-  {
+  if (xid < nx && yid < ny - 1) {
     // retrieve appropriate conserved variables
-    id = xid + yid*nx;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+    id = xid + yid * nx;
+    d  = dev_conserved[id];
+    mx = dev_conserved[n_cells + id];
+    my = dev_conserved[2 * n_cells + id];
+    mz = dev_conserved[3 * n_cells + id];
+    E  = dev_conserved[4 * n_cells + id];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar[i] = dev_conserved[(5 + i) * n_cells + id];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    ge = dev_conserved[(n_fields - 1) * n_cells + id];
+#endif  // DE
 
     // send values back from the kernel
-    dev_bounds_Ly[            id] = d;
-    dev_bounds_Ly[  n_cells + id] = mx;
-    dev_bounds_Ly[2*n_cells + id] = my;
-    dev_bounds_Ly[3*n_cells + id] = mz;
-    dev_bounds_Ly[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Ly[(5+i)*n_cells + id] = scalar[i];
+    dev_bounds_Ly[id]               = d;
+    dev_bounds_Ly[n_cells + id]     = mx;
+    dev_bounds_Ly[2 * n_cells + id] = my;
+    dev_bounds_Ly[3 * n_cells + id] = mz;
+    dev_bounds_Ly[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_Ly[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_Ly[(n_fields-1)*n_cells + id] = ge;
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_Ly[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
 
     // retrieve appropriate conserved variables
-    id = xid + (yid+1)*nx;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+    id = xid + (yid + 1) * nx;
+    d  = dev_conserved[id];
+    mx = dev_conserved[n_cells + id];
+    my = dev_conserved[2 * n_cells + id];
+    mz = dev_conserved[3 * n_cells + id];
+    E  = dev_conserved[4 * n_cells + id];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar[i] = dev_conserved[(5 + i) * n_cells + id];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    ge = dev_conserved[(n_fields - 1) * n_cells + id];
+#endif  // DE
 
     // send values back from the kernel
-    id = xid + yid*nx;
-    dev_bounds_Ry[            id] = d;
-    dev_bounds_Ry[  n_cells + id] = mx;
-    dev_bounds_Ry[2*n_cells + id] = my;
-    dev_bounds_Ry[3*n_cells + id] = mz;
-    dev_bounds_Ry[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Ry[(5+i)*n_cells + id] = scalar[i];
+    id                              = xid + yid * nx;
+    dev_bounds_Ry[id]               = d;
+    dev_bounds_Ry[n_cells + id]     = mx;
+    dev_bounds_Ry[2 * n_cells + id] = my;
+    dev_bounds_Ry[3 * n_cells + id] = mz;
+    dev_bounds_Ry[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_Ry[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_Ry[(n_fields-1)*n_cells + id] = ge;
-    #endif
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_Ry[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
   }
-
 }
 
-
-__global__ void PCM_Reconstruction_3D(Real *dev_conserved,
-                                      Real *dev_bounds_Lx, Real *dev_bounds_Rx,
-                                      Real *dev_bounds_Ly, Real *dev_bounds_Ry,
-                                      Real *dev_bounds_Lz, Real *dev_bounds_Rz,
-                                      int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields)
+__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx,
+                                      Real *dev_bounds_Ly, Real *dev_bounds_Ry, Real *dev_bounds_Lz,
+                                      Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma,
+                                      int n_fields)
 {
-
-  // declare conserved variables for each stencil
-  // these will be placed into registers for each thread
-  Real d, mx, my, mz, E;
-  #ifdef DE
-  Real ge;
-  #endif
-  #ifdef SCALAR
+// declare conserved variables for each stencil
+// these will be placed into registers for each thread
+#ifdef SCALAR
   Real scalar[NSCALARS];
-  #endif
+#endif  // SCALAR
 
-
-  int n_cells = nx*ny*nz;
+  int const n_cells = nx * ny * nz;
 
   // get a thread ID
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
-  int id = xid + yid*nx + zid*nx*ny;
-
-  // x direction
-  if (xid < nx-1 && yid < ny && zid < nz)
-  {
-    // retrieve appropriate conserved variables
-    id = xid + yid*nx + zid*nx*ny;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+  int id = threadIdx.x + blockIdx.x * blockDim.x;
+  int xid, yid, zid;
+  cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid);
+
+  // Guard to avoid out of bounds threads
+  if (xid < nx && yid < ny && zid < nz) {
+    // ========================================
+    // Retrieve appropriate conserved variables
+    // ========================================
+    Real const d  = dev_conserved[id];
+    Real const mx = dev_conserved[n_cells + id];
+    Real const my = dev_conserved[2 * n_cells + id];
+    Real const mz = dev_conserved[3 * n_cells + id];
+    Real const E  = dev_conserved[4 * n_cells + id];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar[i] = dev_conserved[(5 + i) * n_cells + id];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
-
-    // send values back from the kernel
-    dev_bounds_Lx[            id] = d;
-    dev_bounds_Lx[  n_cells + id] = mx;
-    dev_bounds_Lx[2*n_cells + id] = my;
-    dev_bounds_Lx[3*n_cells + id] = mz;
-    dev_bounds_Lx[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Lx[(5+i)*n_cells + id] = scalar[i];
+#endif  // SCALAR
+#ifdef MHD
+    auto const [cellCenteredBx, cellCenteredBy, cellCenteredBz] =
+        mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny);
+#endif  // MHD
+#ifdef DE
+    Real const ge = dev_conserved[(n_fields - 1) * n_cells + id];
+#endif  // DE
+
+    // ================================
+    // Send values back from the kernel
+    // ================================
+
+    // Send the x+1/2 Left interface
+    dev_bounds_Lx[id]               = d;
+    dev_bounds_Lx[n_cells + id]     = mx;
+    dev_bounds_Lx[2 * n_cells + id] = my;
+    dev_bounds_Lx[3 * n_cells + id] = mz;
+    dev_bounds_Lx[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_Lx[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_Lx[(n_fields-1)*n_cells + id] = ge;
-    #endif
-
-    // retrieve appropriate conserved variables
-    id = xid+1 + yid*nx + zid*nx*ny;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+#endif  // SCALAR
+#ifdef MHD
+    dev_bounds_Lx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy;
+    dev_bounds_Lx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz;
+#endif  // MHD
+#ifdef DE
+    dev_bounds_Lx[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
+
+    // Send the y+1/2 Left interface
+    dev_bounds_Ly[id]               = d;
+    dev_bounds_Ly[n_cells + id]     = mx;
+    dev_bounds_Ly[2 * n_cells + id] = my;
+    dev_bounds_Ly[3 * n_cells + id] = mz;
+    dev_bounds_Ly[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_Ly[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
-
-    // send values back from the kernel
-    id = xid + yid*nx + zid*nx*ny;
-    dev_bounds_Rx[            id] = d;
-    dev_bounds_Rx[  n_cells + id] = mx;
-    dev_bounds_Rx[2*n_cells + id] = my;
-    dev_bounds_Rx[3*n_cells + id] = mz;
-    dev_bounds_Rx[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Rx[(5+i)*n_cells + id] = scalar[i];
-    }
-    #endif
-    #ifdef DE
-    dev_bounds_Rx[(n_fields-1)*n_cells + id] = ge;
-    #endif
-  }
-
-  // y direction
-  if (xid < nx && yid < ny-1 && zid < nz)
-  {
-    // retrieve appropriate conserved variables
-    id = xid + yid*nx + zid*nx*ny;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
-    }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
-
-    // send values back from the kernel
-    dev_bounds_Ly[            id] = d;
-    dev_bounds_Ly[  n_cells + id] = mx;
-    dev_bounds_Ly[2*n_cells + id] = my;
-    dev_bounds_Ly[3*n_cells + id] = mz;
-    dev_bounds_Ly[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Ly[(5+i)*n_cells + id] = scalar[i];
-    }
-    #endif
-    #ifdef DE
-    dev_bounds_Ly[(n_fields-1)*n_cells + id] = ge;
-    #endif
-
-    // retrieve appropriate conserved variables
-    id = xid + (yid+1)*nx + zid*nx*ny;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
-    }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
-
-    // send values back from the kernel
-    id = xid + yid*nx + zid*nx*ny;
-    dev_bounds_Ry[            id] = d;
-    dev_bounds_Ry[  n_cells + id] = mx;
-    dev_bounds_Ry[2*n_cells + id] = my;
-    dev_bounds_Ry[3*n_cells + id] = mz;
-    dev_bounds_Ry[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Ry[(5+i)*n_cells + id] = scalar[i];
+#endif  // SCALAR
+#ifdef MHD
+    dev_bounds_Ly[(grid_enum::Q_y_magnetic_z)*n_cells + id] = cellCenteredBz;
+    dev_bounds_Ly[(grid_enum::Q_y_magnetic_x)*n_cells + id] = cellCenteredBx;
+#endif  // MHD
+#ifdef DE
+    dev_bounds_Ly[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
+
+    // Send the z+1/2 Left interface
+    dev_bounds_Lz[id]               = d;
+    dev_bounds_Lz[n_cells + id]     = mx;
+    dev_bounds_Lz[2 * n_cells + id] = my;
+    dev_bounds_Lz[3 * n_cells + id] = mz;
+    dev_bounds_Lz[4 * n_cells + id] = E;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_Lz[(5 + i) * n_cells + id] = scalar[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_Ry[(n_fields-1)*n_cells + id] = ge;
-    #endif
-  }
-
-  // z direction
-  if (xid < nx && yid < ny && zid < nz-1)
-  {
-    // retrieve appropriate conserved variables
-    id = xid + yid*nx + zid*nx*ny;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+#endif  // SCALAR
+#ifdef MHD
+    dev_bounds_Lz[(grid_enum::Q_z_magnetic_x)*n_cells + id] = cellCenteredBx;
+    dev_bounds_Lz[(grid_enum::Q_z_magnetic_y)*n_cells + id] = cellCenteredBy;
+#endif  // MHD
+#ifdef DE
+    dev_bounds_Lz[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
+
+    // Send the x-1/2 Right interface
+    if (xid > 0) {
+      id                              = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny);
+      dev_bounds_Rx[id]               = d;
+      dev_bounds_Rx[n_cells + id]     = mx;
+      dev_bounds_Rx[2 * n_cells + id] = my;
+      dev_bounds_Rx[3 * n_cells + id] = mz;
+      dev_bounds_Rx[4 * n_cells + id] = E;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_bounds_Rx[(5 + i) * n_cells + id] = scalar[i];
+      }
+#endif  // SCALAR
+#ifdef MHD
+      dev_bounds_Rx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy;
+      dev_bounds_Rx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz;
+#endif  // MHD
+#ifdef DE
+      dev_bounds_Rx[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
 
-    // send values back from the kernel
-    dev_bounds_Lz[            id] = d;
-    dev_bounds_Lz[  n_cells + id] = mx;
-    dev_bounds_Lz[2*n_cells + id] = my;
-    dev_bounds_Lz[3*n_cells + id] = mz;
-    dev_bounds_Lz[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Lz[(5+i)*n_cells + id] = scalar[i];
+    if (yid > 0) {
+      // Send the y-1/2 Right interface
+      id                              = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny);
+      dev_bounds_Ry[id]               = d;
+      dev_bounds_Ry[n_cells + id]     = mx;
+      dev_bounds_Ry[2 * n_cells + id] = my;
+      dev_bounds_Ry[3 * n_cells + id] = mz;
+      dev_bounds_Ry[4 * n_cells + id] = E;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_bounds_Ry[(5 + i) * n_cells + id] = scalar[i];
+      }
+#endif  // SCALAR
+#ifdef MHD
+      dev_bounds_Ry[(grid_enum::Q_y_magnetic_z)*n_cells + id] = cellCenteredBz;
+      dev_bounds_Ry[(grid_enum::Q_y_magnetic_x)*n_cells + id] = cellCenteredBx;
+#endif  // MHD
+#ifdef DE
+      dev_bounds_Ry[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
     }
-    #endif
-    #ifdef DE
-    dev_bounds_Lz[(n_fields-1)*n_cells + id] = ge;
-    #endif
 
-    // retrieve appropriate conserved variables
-    id = xid + yid*nx + (zid+1)*nx*ny;
-    d  = dev_conserved[            id];
-    mx = dev_conserved[  n_cells + id];
-    my = dev_conserved[2*n_cells + id];
-    mz = dev_conserved[3*n_cells + id];
-    E  = dev_conserved[4*n_cells + id];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar[i] = dev_conserved[(5+i)*n_cells + id];
+    if (zid > 0) {
+      // Send the z-1/2 Right interface
+      id                              = cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny);
+      dev_bounds_Rz[id]               = d;
+      dev_bounds_Rz[n_cells + id]     = mx;
+      dev_bounds_Rz[2 * n_cells + id] = my;
+      dev_bounds_Rz[3 * n_cells + id] = mz;
+      dev_bounds_Rz[4 * n_cells + id] = E;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_bounds_Rz[(5 + i) * n_cells + id] = scalar[i];
+      }
+#endif  // SCALAR
+#ifdef MHD
+      dev_bounds_Rz[(grid_enum::Q_z_magnetic_x)*n_cells + id] = cellCenteredBx;
+      dev_bounds_Rz[(grid_enum::Q_z_magnetic_y)*n_cells + id] = cellCenteredBy;
+#endif  // MHD
+#ifdef DE
+      dev_bounds_Rz[(n_fields - 1) * n_cells + id] = ge;
+#endif  // DE
     }
-    #endif
-    #ifdef DE
-    ge = dev_conserved[(n_fields-1)*n_cells + id];
-    #endif
-
-    // send values back from the kernel
-    id = xid + yid*nx + zid*nx*ny;
-    dev_bounds_Rz[            id] = d;
-    dev_bounds_Rz[  n_cells + id] = mx;
-    dev_bounds_Rz[2*n_cells + id] = my;
-    dev_bounds_Rz[3*n_cells + id] = mz;
-    dev_bounds_Rz[4*n_cells + id] = E;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_Rz[(5+i)*n_cells + id] = scalar[i];
-    }
-    #endif
-    #ifdef DE
-    dev_bounds_Rz[(n_fields-1)*n_cells + id] = ge;
-    #endif
-
   }
 }
-
-
-#endif //CUDA
diff --git a/src/reconstruction/pcm_cuda.h b/src/reconstruction/pcm_cuda.h
index 6b66e44e3..dbf83fb65 100644
--- a/src/reconstruction/pcm_cuda.h
+++ b/src/reconstruction/pcm_cuda.h
@@ -1,24 +1,19 @@
 /*! \file pcm_cuda.h
  *  \brief Declarations of the cuda pcm kernels */
 
-#ifdef CUDA
-
 #ifndef PCM_CUDA_H
 #define PCM_CUDA_H
 
+__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells,
+                                      int n_ghost, Real gamma, int n_fields);
 
-__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields);
-
-
-__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma, int n_fields);
-
-
-__global__ void PCM_Reconstruction_3D(Real *dev_conserved,
-                                      Real *dev_bounds_Lx, Real *dev_bounds_Rx,
-                                      Real *dev_bounds_Ly, Real *dev_bounds_Ry,
-                                      Real *dev_bounds_Lz, Real *dev_bounds_Rz,
-                                      int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields);
+__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx,
+                                      Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma,
+                                      int n_fields);
 
-#endif // PCM_CUDA_H
-#endif // CUDA
+__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx,
+                                      Real *dev_bounds_Ly, Real *dev_bounds_Ry, Real *dev_bounds_Lz,
+                                      Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma,
+                                      int n_fields);
 
+#endif  // PCM_CUDA_H
diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu
index ea6b95aaa..bb31e9904 100644
--- a/src/reconstruction/plmc_cuda.cu
+++ b/src/reconstruction/plmc_cuda.cu
@@ -2,638 +2,299 @@
  *  \brief Definitions of the piecewise linear reconstruction functions with
            limiting applied in the characteristic variables, as described
            in Stone et al., 2008. */
-#ifdef CUDA
-#ifdef PLMC
 
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../reconstruction/plmc_cuda.h"
+#include "../reconstruction/reconstruction.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
+
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
+#endif  // DE
+
+/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real
+ *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real
+ gamma, int dir)
+ *  \brief When passed a stencil of conserved variables, returns the left and
+ right boundary values for the interface calculated using plm. */
+__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
+                                                 int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields)
+{
+  // get a thread ID
+  int const thread_id = threadIdx.x + blockIdx.x * blockDim.x;
+  int xid, yid, zid;
+  cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid);
 
-#ifdef DE //PRESSURE_DE
-#include "../utils/hydro_utilities.h"
-#endif
+  // Ensure that we are only operating on cells that will be used
+  if (reconstruction::Thread_Guard<2>(nx, ny, nz, xid, yid, zid)) {
+    return;
+  }
 
+  // Compute the total number of cells
+  int const n_cells = nx * ny * nz;
 
-/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using plm. */
-__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
-{
-  int n_cells = nx*ny*nz;
+  // Set the field indices for the various directions
   int o1, o2, o3;
-  if (dir == 0) {
-    o1 = 1; o2 = 2; o3 = 3;
+  switch (dir) {
+    case 0:
+      o1 = grid_enum::momentum_x;
+      o2 = grid_enum::momentum_y;
+      o3 = grid_enum::momentum_z;
+      break;
+    case 1:
+      o1 = grid_enum::momentum_y;
+      o2 = grid_enum::momentum_z;
+      o3 = grid_enum::momentum_x;
+      break;
+    case 2:
+      o1 = grid_enum::momentum_z;
+      o2 = grid_enum::momentum_x;
+      o3 = grid_enum::momentum_y;
+      break;
   }
-  if (dir == 1) {
-    o1 = 2; o2 = 3; o3 = 1;
-  }
-  if (dir == 2) {
-    o1 = 3; o2 = 1; o3 = 2;
+
+  // load the 3-cell stencil into registers
+  // cell i
+  reconstruction::Primitive const cell_i =
+      reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // cell i-1. The equality checks the direction and will subtract one from the correct direction
+  reconstruction::Primitive const cell_imo = reconstruction::Load_Data(
+      dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // cell i+1. The equality checks the direction and add one to the correct direction
+  reconstruction::Primitive const cell_ipo = reconstruction::Load_Data(
+      dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // calculate the adiabatic sound speed in cell i
+  Real const sound_speed         = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma);
+  Real const sound_speed_squared = sound_speed * sound_speed;
+
+// Compute the eigenvectors
+#ifdef MHD
+  reconstruction::EigenVecs const eigenvectors =
+      reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma);
+#else
+  reconstruction::EigenVecs eigenvectors;
+#endif  // MHD
+
+  // Compute the left, right, centered, and van Leer differences of the
+  // primitive variables Note that here L and R refer to locations relative to
+  // the cell center
+
+  // left
+  reconstruction::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i);
+
+  // right
+  reconstruction::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo);
+
+  // centered
+  reconstruction::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5);
+
+  // Van Leer
+  reconstruction::Primitive const del_G = reconstruction::Van_Leer_Slope(del_L, del_R);
+
+  // Project the left, right, centered and van Leer differences onto the
+  // characteristic variables Stone Eqn 37 (del_a are differences in
+  // characteristic variables, see Stone for notation) Use the eigenvectors
+  // given in Stone 2008, Appendix A
+  reconstruction::Characteristic const del_a_L =
+      reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  reconstruction::Characteristic const del_a_R =
+      reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  reconstruction::Characteristic const del_a_C =
+      reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  reconstruction::Characteristic const del_a_G =
+      reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized
+  // difference in the characteristic variables back onto the primitive variables Stone Eqn 39
+  reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive(
+      cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed,
+      sound_speed_squared, gamma);
+
+  // Compute the left and right interface values using the monotonized difference in the primitive variables
+  reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, 1.0);
+  reconstruction::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, -1.0);
+
+  // Limit the interfaces
+  reconstruction::Plm_Limit_Interfaces(interface_L_iph, interface_R_imh, cell_imo, cell_i, cell_ipo);
+
+#ifndef VL
+
+  Real const dtodx = dt / dx;
+
+  // Compute the eigenvalues of the linearized equations in the
+  // primitive variables using the cell-centered primitive variables
+  Real const lambda_m = cell_i.velocity_x - sound_speed;
+  Real const lambda_0 = cell_i.velocity_x;
+  Real const lambda_p = cell_i.velocity_x + sound_speed;
+
+  // Integrate linear interpolation function over domain of dependence
+  // defined by max(min) eigenvalue
+  Real qx                    = -0.5 * fmin(lambda_m, 0.0) * dtodx;
+  interface_R_imh.density    = interface_R_imh.density + qx * del_m_i.density;
+  interface_R_imh.velocity_x = interface_R_imh.velocity_x + qx * del_m_i.velocity_x;
+  interface_R_imh.velocity_y = interface_R_imh.velocity_y + qx * del_m_i.velocity_y;
+  interface_R_imh.velocity_z = interface_R_imh.velocity_z + qx * del_m_i.velocity_z;
+  interface_R_imh.pressure   = interface_R_imh.pressure + qx * del_m_i.pressure;
+
+  qx                         = 0.5 * fmax(lambda_p, 0.0) * dtodx;
+  interface_L_iph.density    = interface_L_iph.density - qx * del_m_i.density;
+  interface_L_iph.velocity_x = interface_L_iph.velocity_x - qx * del_m_i.velocity_x;
+  interface_L_iph.velocity_y = interface_L_iph.velocity_y - qx * del_m_i.velocity_y;
+  interface_L_iph.velocity_z = interface_L_iph.velocity_z - qx * del_m_i.velocity_z;
+  interface_L_iph.pressure   = interface_L_iph.pressure - qx * del_m_i.pressure;
+
+  #ifdef DE
+  interface_R_imh.gas_energy = interface_R_imh.gas_energy + qx * del_m_i.gas_energy;
+  interface_L_iph.gas_energy = interface_L_iph.gas_energy - qx * del_m_i.gas_energy;
+  #endif  // DE
+
+  #ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m_i.scalar[i];
+    interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m_i.scalar[i];
   }
+  #endif  // SCALAR
+
+  // Perform the characteristic tracing
+  // Stone Eqns 42 & 43
 
-  // declare primitive variables for each stencil
-  // these will be placed into registers for each thread
-  Real d_i, vx_i, vy_i, vz_i, p_i;
-  Real d_imo, vx_imo, vy_imo, vz_imo, p_imo;
-  Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo;
-
-  // declare other variables to be used
-  Real a_i;
-  Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L;
-  Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R;
-  Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C;
-  Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G;
-  Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L;
-  Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R;
-  Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C;
-  Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G;
-  Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m;
-  Real lim_slope_a, lim_slope_b;
-  Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i;
-  Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph;
-  Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh;
-  Real C;
-  #ifndef VL
-  Real dtodx = dt/dx;
-  Real lambda_m, lambda_0, lambda_p;
-  Real qx;
-  Real lamdiff;
-  Real sum_0, sum_1, sum_2, sum_3, sum_4;
-  #endif // not VL
+  // left-hand interface value, i+1/2
+  Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0;
   #ifdef DE
-  Real ge_i, ge_imo, ge_ipo;
-  Real del_ge_L, del_ge_R, del_ge_C, del_ge_G;
-  Real del_ge_m_i;
-  Real ge_L_iph, ge_R_imh;
-  Real E, E_kin, dge;
-  #ifndef VL
-  Real sum_ge;
-  #endif // not VL
-  #endif
+  Real sum_ge = 0;
+  #endif  // DE
   #ifdef SCALAR
-  Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS];
-  Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS];
-  Real del_scalar_m_i[NSCALARS];
-  Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS];
-  #ifndef VL
   Real sum_scalar[NSCALARS];
-  #endif // not VL
-  #endif
-
-  // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId*blockDim.x;
-  int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
-
-  int xs, xe, ys, ye, zs, ze;
-  if (dir == 0) {
-    xs = 1; xe = nx-2;
-    ys = 0; ye = ny;
-    zs = 0; ze = nz;
+  for (int i = 0; i < NSCALARS; i++) {
+    sum_scalar[i] = 0.0;
   }
-  if (dir == 1) {
-    xs = 0; xe = nx;
-    ys = 1; ye = ny-2;
-    zs = 0; ze = nz;
+  #endif  // SCALAR
+  if (lambda_m >= 0) {
+    Real lamdiff = lambda_p - lambda_m;
+
+    sum_0 += lamdiff *
+             (-cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared));
+    sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density));
+    sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0);
   }
-  if (dir == 2) {
-    xs = 0; xe = nx;
-    ys = 0; ye = ny;
-    zs = 1; ze = nz-2;
-  }
-
-
-  if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze)
-  {
-    // load the 3-cell stencil into registers
-    // cell i
-    id = xid + yid*nx + zid*nx*ny;
-    d_i  =  dev_conserved[            id];
-    vx_i =  dev_conserved[o1*n_cells + id] / d_i;
-    vy_i =  dev_conserved[o2*n_cells + id] / d_i;
-    vz_i =  dev_conserved[o3*n_cells + id] / d_i;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_i  = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_i  = fmax(p_i, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_i[i] = dev_conserved[(5+i)*n_cells + id] / d_i;
-    }
-    #endif
-    #ifdef DE
-    ge_i =  dge / d_i;
-    #endif
-    // cell i-1
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    d_imo  =  dev_conserved[            id];
-    vx_imo =  dev_conserved[o1*n_cells + id] / d_imo;
-    vy_imo =  dev_conserved[o2*n_cells + id] / d_imo;
-    vz_imo =  dev_conserved[o3*n_cells + id] / d_imo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_imo * ( vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_imo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_imo  = (dev_conserved[4*n_cells + id] - 0.5*d_imo*(vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_imo  = fmax(p_imo, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_imo[i] = dev_conserved[(5+i)*n_cells + id] / d_imo;
-    }
-    #endif
-    #ifdef DE
-    ge_imo =  dge / d_imo;
-    #endif
-    // cell i+1
-    if (dir == 0) id = xid+1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid+1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid+1)*nx*ny;
-    d_ipo  =  dev_conserved[            id];
-    vx_ipo =  dev_conserved[o1*n_cells + id] / d_ipo;
-    vy_ipo =  dev_conserved[o2*n_cells + id] / d_ipo;
-    vz_ipo =  dev_conserved[o3*n_cells + id] / d_ipo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_ipo * ( vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_ipo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_ipo  = (dev_conserved[4*n_cells + id] - 0.5*d_ipo*(vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_ipo  = fmax(p_ipo, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_ipo[i] = dev_conserved[(5+i)*n_cells + id] / d_ipo;
-    }
-    #endif
-    #ifdef DE
-    ge_ipo =  dge / d_ipo;
-    #endif
-
-
-    // calculate the adiabatic sound speed in cell i
-    a_i   = sqrt(gamma*p_i/d_i);
-
-
-    // Compute the eigenvalues of the linearized equations in the
-    // primitive variables using the cell-centered primitive variables
-    #ifndef VL
-    lambda_m = vx_i-a_i;
-    lambda_0 = vx_i;
-    lambda_p = vx_i+a_i;
-    #endif
-
-    // Compute the left, right, centered, and van Leer differences of the primitive variables
-    // Note that here L and R refer to locations relative to the cell center
-
-    // left
-    del_d_L  = d_i - d_imo;
-    del_vx_L = vx_i - vx_imo;
-    del_vy_L = vy_i - vy_imo;
-    del_vz_L = vz_i - vz_imo;
-    del_p_L  = p_i  - p_imo;
-
-    // right
-    del_d_R  = d_ipo  - d_i;
-    del_vx_R = vx_ipo - vx_i;
-    del_vy_R = vy_ipo - vy_i;
-    del_vz_R = vz_ipo - vz_i;
-    del_p_R  = p_ipo  - p_i;
-
-    // centered
-    del_d_C  = 0.5*(d_ipo - d_imo);
-    del_vx_C = 0.5*(vx_ipo - vx_imo);
-    del_vy_C = 0.5*(vy_ipo - vy_imo);
-    del_vz_C = 0.5*(vz_ipo - vz_imo);
-    del_p_C  = 0.5*(p_ipo - p_imo);
-
-    // Van Leer
-    if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); }
-    else { del_d_G = 0.0; }
-    if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); }
-    else { del_vx_G = 0.0; }
-    if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); }
-    else { del_vy_G = 0.0; }
-    if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); }
-    else { del_vz_G = 0.0; }
-    if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); }
-    else { del_p_G = 0.0; }
-
-    #ifdef DE
-    del_ge_L = ge_i - ge_imo;
-    del_ge_R = ge_ipo - ge_i;
-    del_ge_C = 0.5*(ge_ipo - ge_imo);
-    if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); }
-    else { del_ge_G = 0.0; }
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      del_scalar_L[i] = scalar_i[i] - scalar_imo[i];
-      del_scalar_R[i] = scalar_ipo[i] - scalar_i[i];
-      del_scalar_C[i] = 0.5*(scalar_ipo[i] - scalar_imo[i]);
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); }
-      else { del_scalar_G[i] = 0.0; }
-    }
-    #endif
-
-
-    // Project the left, right, centered and van Leer differences onto the characteristic variables
-    // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation)
-    // Use the eigenvectors given in Stone 2008, Appendix A
-    del_a_0_L = -d_i * del_vx_L / (2*a_i) + del_p_L / (2*a_i*a_i);
-    del_a_1_L = del_d_L - del_p_L / (a_i*a_i);
-    del_a_2_L = del_vy_L;
-    del_a_3_L = del_vz_L;
-    del_a_4_L = d_i * del_vx_L / (2*a_i) + del_p_L / (2*a_i*a_i);
-
-    del_a_0_R = -d_i * del_vx_R / (2*a_i) + del_p_R / (2*a_i*a_i);
-    del_a_1_R = del_d_R - del_p_R / (a_i*a_i);
-    del_a_2_R = del_vy_R;
-    del_a_3_R = del_vz_R;
-    del_a_4_R = d_i * del_vx_R / (2*a_i) + del_p_R / (2*a_i*a_i);
-
-    del_a_0_C = -d_i * del_vx_C / (2*a_i) + del_p_C / (2*a_i*a_i);
-    del_a_1_C = del_d_C - del_p_C / (a_i*a_i);
-    del_a_2_C = del_vy_C;
-    del_a_3_C = del_vz_C;
-    del_a_4_C = d_i * del_vx_C / (2*a_i) + del_p_C / (2*a_i*a_i);
-
-    del_a_0_G = -d_i * del_vx_G / (2*a_i) + del_p_G / (2*a_i*a_i);
-    del_a_1_G = del_d_G - del_p_G / (a_i*a_i);
-    del_a_2_G = del_vy_G;
-    del_a_3_G = del_vz_G;
-    del_a_4_G = d_i * del_vx_G / (2*a_i) + del_p_G / (2*a_i*a_i);
-
-
-    // Apply monotonicity constraints to the differences in the characteristic variables
-
-    del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0;
-
-    if (del_a_0_L*del_a_0_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R));
-      lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G));
-      del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_1_L*del_a_1_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R));
-      lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G));
-      del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_2_L*del_a_2_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R));
-      lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G));
-      del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_3_L*del_a_3_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R));
-      lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G));
-      del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_4_L*del_a_4_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R));
-      lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G));
-      del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0*lim_slope_a, lim_slope_b);
-    }
-    #ifdef DE
-    del_ge_m_i = 0.0;
-    if (del_ge_L*del_ge_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R));
-      lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G));
-      del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0*lim_slope_a, lim_slope_b);
-    }
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      del_scalar_m_i[i] = 0.0;
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) {
-        lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i]));
-        lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i]));
-        del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0*lim_slope_a, lim_slope_b);
-      }
-    }
-    #endif
-
-
-
-    // Project the monotonized difference in the characteristic variables back onto the
-    // primitive variables
-    // Stone Eqn 39
-    del_d_m_i  = del_a_0_m + del_a_1_m + del_a_4_m;
-    del_vx_m_i = -a_i*del_a_0_m / d_i + a_i* del_a_4_m / d_i;
-    del_vy_m_i = del_a_2_m;
-    del_vz_m_i = del_a_3_m;
-    del_p_m_i  = a_i*a_i*del_a_0_m + a_i*a_i*del_a_4_m;
-
-
-    // Compute the left and right interface values using the monotonized difference in the
-    // primitive variables
-
-    d_R_imh  = d_i  - 0.5*del_d_m_i;
-    vx_R_imh = vx_i - 0.5*del_vx_m_i;
-    vy_R_imh = vy_i - 0.5*del_vy_m_i;
-    vz_R_imh = vz_i - 0.5*del_vz_m_i;
-    p_R_imh  = p_i  - 0.5*del_p_m_i;
-
-    d_L_iph  = d_i  + 0.5*del_d_m_i;
-    vx_L_iph = vx_i + 0.5*del_vx_m_i;
-    vy_L_iph = vy_i + 0.5*del_vy_m_i;
-    vz_L_iph = vz_i + 0.5*del_vz_m_i;
-    p_L_iph  = p_i  + 0.5*del_p_m_i;
-
-    #ifdef DE
-    ge_R_imh = ge_i - 0.5*del_ge_m_i;
-    ge_L_iph = ge_i + 0.5*del_ge_m_i;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_R_imh[i] = scalar_i[i] - 0.5*del_scalar_m_i[i];
-      scalar_L_iph[i] = scalar_i[i] + 0.5*del_scalar_m_i[i];
-    }
-    #endif
-
-
-    C = d_R_imh + d_L_iph;
-    d_R_imh = fmax( fmin(d_i, d_imo), d_R_imh );
-    d_R_imh = fmin( fmax(d_i, d_imo), d_R_imh );
-    d_L_iph = C - d_R_imh;
-    d_L_iph = fmax( fmin(d_i, d_ipo), d_L_iph );
-    d_L_iph = fmin( fmax(d_i, d_ipo), d_L_iph );
-    d_R_imh = C - d_L_iph;
-
-    C = vx_R_imh + vx_L_iph;
-    vx_R_imh = fmax( fmin(vx_i, vx_imo), vx_R_imh );
-    vx_R_imh = fmin( fmax(vx_i, vx_imo), vx_R_imh );
-    vx_L_iph = C - vx_R_imh;
-    vx_L_iph = fmax( fmin(vx_i, vx_ipo), vx_L_iph );
-    vx_L_iph = fmin( fmax(vx_i, vx_ipo), vx_L_iph );
-    vx_R_imh = C - vx_L_iph;
-
-    C = vy_R_imh + vy_L_iph;
-    vy_R_imh = fmax( fmin(vy_i, vy_imo), vy_R_imh );
-    vy_R_imh = fmin( fmax(vy_i, vy_imo), vy_R_imh );
-    vy_L_iph = C - vy_R_imh;
-    vy_L_iph = fmax( fmin(vy_i, vy_ipo), vy_L_iph );
-    vy_L_iph = fmin( fmax(vy_i, vy_ipo), vy_L_iph );
-    vy_R_imh = C - vy_L_iph;
-
-    C = vz_R_imh + vz_L_iph;
-    vz_R_imh = fmax( fmin(vz_i, vz_imo), vz_R_imh );
-    vz_R_imh = fmin( fmax(vz_i, vz_imo), vz_R_imh );
-    vz_L_iph = C - vz_R_imh;
-    vz_L_iph = fmax( fmin(vz_i, vz_ipo), vz_L_iph );
-    vz_L_iph = fmin( fmax(vz_i, vz_ipo), vz_L_iph );
-    vz_R_imh = C - vz_L_iph;
-
-    C = p_R_imh + p_L_iph;
-    p_R_imh = fmax( fmin(p_i, p_imo), p_R_imh );
-    p_R_imh = fmin( fmax(p_i, p_imo), p_R_imh );
-    p_L_iph = C - p_R_imh;
-    p_L_iph = fmax( fmin(p_i, p_ipo), p_L_iph );
-    p_L_iph = fmin( fmax(p_i, p_ipo), p_L_iph );
-    p_R_imh = C - p_L_iph;
-
-    del_d_m_i  = d_L_iph  - d_R_imh;
-    del_vx_m_i = vx_L_iph - vx_R_imh;
-    del_vy_m_i = vy_L_iph - vy_R_imh;
-    del_vz_m_i = vz_L_iph - vz_R_imh;
-    del_p_m_i  = p_L_iph  - p_R_imh;
-
-    #ifdef DE
-    C = ge_R_imh + ge_L_iph;
-    ge_R_imh = fmax( fmin(ge_i, ge_imo), ge_R_imh );
-    ge_R_imh = fmin( fmax(ge_i, ge_imo), ge_R_imh );
-    ge_L_iph = C - ge_R_imh;
-    ge_L_iph = fmax( fmin(ge_i, ge_ipo), ge_L_iph );
-    ge_L_iph = fmin( fmax(ge_i, ge_ipo), ge_L_iph );
-    ge_R_imh = C - ge_L_iph;
-    del_ge_m_i = ge_L_iph - ge_R_imh;
-    #endif
-
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      C = scalar_R_imh[i] + scalar_L_iph[i];
-      scalar_R_imh[i] = fmax( fmin(scalar_i[i], scalar_imo[i]), scalar_R_imh[i] );
-      scalar_R_imh[i] = fmin( fmax(scalar_i[i], scalar_imo[i]), scalar_R_imh[i] );
-      scalar_L_iph[i] = C - scalar_R_imh[i];
-      scalar_L_iph[i] = fmax( fmin(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i] );
-      scalar_L_iph[i] = fmin( fmax(scalar_i[i], scalar_ipo[i]), scalar_L_iph[i] );
-      scalar_R_imh[i] = C - scalar_L_iph[i];
-      del_scalar_m_i[i] = scalar_L_iph[i] - scalar_R_imh[i];
-    }
-    #endif
-
-
-    #ifndef VL
-    // Integrate linear interpolation function over domain of dependence
-    // defined by max(min) eigenvalue
-    qx = -0.5*fmin(lambda_m, 0.0)*dtodx;
-    d_R_imh  = d_R_imh  + qx * del_d_m_i;
-    vx_R_imh = vx_R_imh + qx * del_vx_m_i;
-    vy_R_imh = vy_R_imh + qx * del_vy_m_i;
-    vz_R_imh = vz_R_imh + qx * del_vz_m_i;
-    p_R_imh  = p_R_imh  + qx * del_p_m_i;
-
-    qx = 0.5*fmax(lambda_p, 0.0)*dtodx;
-    d_L_iph  = d_L_iph  - qx * del_d_m_i;
-    vx_L_iph = vx_L_iph - qx * del_vx_m_i;
-    vy_L_iph = vy_L_iph - qx * del_vy_m_i;
-    vz_L_iph = vz_L_iph - qx * del_vz_m_i;
-    p_L_iph  = p_L_iph  - qx * del_p_m_i;
-
-    #ifdef DE
-    ge_R_imh = ge_R_imh + qx * del_ge_m_i;
-    ge_L_iph = ge_L_iph - qx * del_ge_m_i;
-    #endif
-
-    #ifdef  SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_R_imh[i] = scalar_R_imh[i] + qx * del_scalar_m_i[i];
-      scalar_L_iph[i] = scalar_L_iph[i] - qx * del_scalar_m_i[i];
-    }
-    #endif
+  if (lambda_0 >= 0) {
+    Real lamdiff = lambda_p - lambda_0;
 
-
-    // Perform the characteristic tracing
-    // Stone Eqns 42 & 43
-
-    // left-hand interface value, i+1/2
-    sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0;
-    #ifdef DE
-    sum_ge = 0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      sum_scalar[i] = 0.0;
-    }
-    #endif
-    if (lambda_m >= 0)
-    {
-      lamdiff = lambda_p - lambda_m;
-
-      sum_0 += lamdiff * (-d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i));
-      sum_1 += lamdiff * (del_vx_m_i/2.0 - del_p_m_i/(2*a_i*d_i));
-      sum_4 += lamdiff * (-d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0);
-    }
-    if (lambda_0 >= 0)
-    {
-      lamdiff = lambda_p - lambda_0;
-
-      sum_0 += lamdiff * (del_d_m_i - del_p_m_i/(a_i*a_i));
-      sum_2 += lamdiff * del_vy_m_i;
-      sum_3 += lamdiff * del_vz_m_i;
-      #ifdef DE
-      sum_ge += lamdiff * del_ge_m_i;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        sum_scalar[i] += lamdiff * del_scalar_m_i[i];
-      }
-      #endif
-    }
-    if (lambda_p >= 0)
-    {
-      lamdiff = lambda_p - lambda_p;
-
-      sum_0 += lamdiff * (d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i));
-      sum_1 += lamdiff * (del_vx_m_i/2.0 + del_p_m_i/(2*a_i*d_i));
-      sum_4 += lamdiff * (d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0);
+    sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared));
+    sum_2 += lamdiff * del_m_i.velocity_y;
+    sum_3 += lamdiff * del_m_i.velocity_z;
+  #ifdef DE
+    sum_ge += lamdiff * del_m_i.gas_energy;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      sum_scalar[i] += lamdiff * del_m_i.scalar[i];
     }
+  #endif  // SCALAR
+  }
+  if (lambda_p >= 0) {
+    Real lamdiff = lambda_p - lambda_p;
 
-    // add the corrections to the initial guesses for the interface values
-    d_L_iph  += 0.5*dtodx*sum_0;
-    vx_L_iph += 0.5*dtodx*sum_1;
-    vy_L_iph += 0.5*dtodx*sum_2;
-    vz_L_iph += 0.5*dtodx*sum_3;
-    p_L_iph  += 0.5*dtodx*sum_4;
-    #ifdef DE
-    ge_L_iph += 0.5*dtodx*sum_ge;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_L_iph[i] += 0.5*dtodx*sum_scalar[i];
-    }
-    #endif
+    sum_0 += lamdiff *
+             (cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared));
+    sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density));
+    sum_4 += lamdiff * (cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0);
+  }
 
+  // add the corrections to the initial guesses for the interface values
+  interface_L_iph.density += 0.5 * dtodx * sum_0;
+  interface_L_iph.velocity_x += 0.5 * dtodx * sum_1;
+  interface_L_iph.velocity_y += 0.5 * dtodx * sum_2;
+  interface_L_iph.velocity_z += 0.5 * dtodx * sum_3;
+  interface_L_iph.pressure += 0.5 * dtodx * sum_4;
+  #ifdef DE
+  interface_L_iph.gas_energy += 0.5 * dtodx * sum_ge;
+  #endif  // DE
+  #ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i];
+  }
+  #endif  // SCALAR
 
-    // right-hand interface value, i-1/2
-    sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0;
-    #ifdef DE
-    sum_ge = 0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      sum_scalar[i] = 0;
-    }
-    #endif
-    if (lambda_m <= 0)
-    {
-      lamdiff = lambda_m - lambda_m;
-
-      sum_0 += lamdiff * (-d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i));
-      sum_1 += lamdiff * (del_vx_m_i/2.0 - del_p_m_i/(2*a_i*d_i));
-      sum_4 += lamdiff * (-d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0);
-    }
-    if (lambda_0 <= 0)
-    {
-      lamdiff = lambda_m - lambda_0;
-
-      sum_0 += lamdiff * (del_d_m_i - del_p_m_i/(a_i*a_i));
-      sum_2 += lamdiff * del_vy_m_i;
-      sum_3 += lamdiff * del_vz_m_i;
-      #ifdef DE
-      sum_ge += lamdiff * del_ge_m_i;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        sum_scalar[i] += lamdiff * del_scalar_m_i[i];
-      }
-      #endif
-    }
-    if (lambda_p <= 0)
-    {
-      lamdiff = lambda_m - lambda_p;
+  // right-hand interface value, i-1/2
+  sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0;
+  #ifdef DE
+  sum_ge = 0;
+  #endif  // DE
+  #ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    sum_scalar[i] = 0;
+  }
+  #endif  // SCALAR
+  if (lambda_m <= 0) {
+    Real lamdiff = lambda_m - lambda_m;
+
+    sum_0 += lamdiff *
+             (-cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared));
+    sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density));
+    sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0);
+  }
+  if (lambda_0 <= 0) {
+    Real lamdiff = lambda_m - lambda_0;
 
-      sum_0 += lamdiff * (d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i));
-      sum_1 += lamdiff * (del_vx_m_i/2.0 + del_p_m_i/(2*a_i*d_i));
-      sum_4 += lamdiff * (d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0);
+    sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared));
+    sum_2 += lamdiff * del_m_i.velocity_y;
+    sum_3 += lamdiff * del_m_i.velocity_z;
+  #ifdef DE
+    sum_ge += lamdiff * del_m_i.gas_energy;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      sum_scalar[i] += lamdiff * del_m_i.scalar[i];
     }
+  #endif  // SCALAR
+  }
+  if (lambda_p <= 0) {
+    Real lamdiff = lambda_m - lambda_p;
 
-    // add the corrections
-    d_R_imh  += 0.5*dtodx*sum_0;
-    vx_R_imh += 0.5*dtodx*sum_1;
-    vy_R_imh += 0.5*dtodx*sum_2;
-    vz_R_imh += 0.5*dtodx*sum_3;
-    p_R_imh  += 0.5*dtodx*sum_4;
-    #ifdef DE
-    ge_R_imh += 0.5*dtodx*sum_ge;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_R_imh[i] += 0.5*dtodx*sum_scalar[i];
-    }
-    #endif
-    #endif // not VL
-
-    // apply minimum constraints
-    d_R_imh = fmax(d_R_imh, (Real) TINY_NUMBER);
-    d_L_iph = fmax(d_L_iph, (Real) TINY_NUMBER);
-    p_R_imh = fmax(p_R_imh, (Real) TINY_NUMBER);
-    p_L_iph = fmax(p_L_iph, (Real) TINY_NUMBER);
-
-    // Convert the left and right states in the primitive to the conserved variables
-    // send final values back from kernel
-    // bounds_R refers to the right side of the i-1/2 interface
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    dev_bounds_R[            id] = d_R_imh;
-    dev_bounds_R[o1*n_cells + id] = d_R_imh*vx_R_imh;
-    dev_bounds_R[o2*n_cells + id] = d_R_imh*vy_R_imh;
-    dev_bounds_R[o3*n_cells + id] = d_R_imh*vz_R_imh;
-    dev_bounds_R[4*n_cells + id] = (p_R_imh/(gamma-1.0)) + 0.5*d_R_imh*(vx_R_imh*vx_R_imh + vy_R_imh*vy_R_imh + vz_R_imh*vz_R_imh);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_R[(5+i)*n_cells + id] = d_R_imh*scalar_R_imh[i];
-    }
-    #endif
-    #ifdef DE
-    dev_bounds_R[(n_fields-1)*n_cells + id] = d_R_imh*ge_R_imh;
-    #endif
-    // bounds_L refers to the left side of the i+1/2 interface
-    id = xid + yid*nx + zid*nx*ny;
-    dev_bounds_L[            id] = d_L_iph;
-    dev_bounds_L[o1*n_cells + id] = d_L_iph*vx_L_iph;
-    dev_bounds_L[o2*n_cells + id] = d_L_iph*vy_L_iph;
-    dev_bounds_L[o3*n_cells + id] = d_L_iph*vz_L_iph;
-    dev_bounds_L[4*n_cells + id] = (p_L_iph/(gamma-1.0)) + 0.5*d_L_iph*(vx_L_iph*vx_L_iph + vy_L_iph*vy_L_iph + vz_L_iph*vz_L_iph);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_L[(5+i)*n_cells + id] = d_L_iph*scalar_L_iph[i];
-    }
-    #endif
-    #ifdef DE
-    dev_bounds_L[(n_fields-1)*n_cells + id] = d_L_iph*ge_L_iph;
-    #endif
+    sum_0 += lamdiff *
+             (cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared));
+    sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density));
+    sum_4 += lamdiff * (cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0);
+  }
 
+  // add the corrections
+  interface_R_imh.density += 0.5 * dtodx * sum_0;
+  interface_R_imh.velocity_x += 0.5 * dtodx * sum_1;
+  interface_R_imh.velocity_y += 0.5 * dtodx * sum_2;
+  interface_R_imh.velocity_z += 0.5 * dtodx * sum_3;
+  interface_R_imh.pressure += 0.5 * dtodx * sum_4;
+  #ifdef DE
+  interface_R_imh.gas_energy += 0.5 * dtodx * sum_ge;
+  #endif  // DE
+  #ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i];
   }
+  #endif  // SCALAR
+#endif    // CTU
+
+  // apply minimum constraints
+  interface_R_imh.density  = fmax(interface_R_imh.density, (Real)TINY_NUMBER);
+  interface_L_iph.density  = fmax(interface_L_iph.density, (Real)TINY_NUMBER);
+  interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER);
+  interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER);
+
+  // Convert the left and right states in the primitive to the conserved variables send final values back from kernel
+  // bounds_R refers to the right side of the i-1/2 interface
+  size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
+  reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma);
+
+  id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny);
+  reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma);
 }
-
-
-
-#endif //PLMC
-#endif //CUDA
diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h
index 9b38324ff..c2d25df84 100644
--- a/src/reconstruction/plmc_cuda.h
+++ b/src/reconstruction/plmc_cuda.h
@@ -1,20 +1,21 @@
 /*! \file plmc_cuda.h
- *  \brief Declarations of the cuda plm kernels, characteristic reconstruction version. */
-
-#ifdef CUDA
-#ifdef PLMC
+ *  \brief Declarations of the cuda plm kernels, characteristic reconstruction
+ * version. */
 
 #ifndef PLMC_CUDA_H
 #define PLMC_CUDA_H
 
 #include "../global/global.h"
+#include "../grid/grid_enum.h"
+#include "../utils/hydro_utilities.h"
+#include "../utils/mhd_utilities.h"
 
-/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using plm. */
-__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields);
-
+/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real
+ *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real
+ gamma, int dir)
+ *  \brief When passed a stencil of conserved variables, returns the left and
+ right boundary values for the interface calculated using plm. */
+__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
+                                                 int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields);
 
-#endif // PLMC_CUDA_H
-#endif // PLMC
-#endif // CUDA
+#endif  // PLMC_CUDA_H
diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu
new file mode 100644
index 000000000..678f6329d
--- /dev/null
+++ b/src/reconstruction/plmc_cuda_tests.cu
@@ -0,0 +1,293 @@
+/*!
+ * \file plmc_cuda_tests.cu
+ * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu
+ *
+ */
+
+// STL Includes
+#include <random>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include <algorithm>
+
+#include "../global/global.h"
+#include "../io/io.h"
+#include "../reconstruction/plmc_cuda.h"
+#include "../utils/DeviceVector.h"
+#include "../utils/hydro_utilities.h"
+#include "../utils/testing_utilities.h"
+
+TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput)
+{
+#ifndef VL
+  std::cerr << "Warning: The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) "
+               "integrator"
+            << std::endl;
+  return;
+#endif  // VL
+  // Set up PRNG to use
+  std::mt19937_64 prng(42);
+  std::uniform_real_distribution<double> doubleRand(0.1, 5);
+
+  // Mock up needed information
+  size_t const nx       = 5;
+  size_t const ny       = 4;
+  size_t const nz       = 4;
+  size_t const n_fields = 5;
+  double const dx       = doubleRand(prng);
+  double const dt       = doubleRand(prng);
+  double const gamma    = 5.0 / 3.0;
+
+  // Setup host grid. Fill host grid with random values and randomly assign maximum value
+  std::vector<double> host_grid(nx * ny * nz * n_fields);
+  for (Real &val : host_grid) {
+    val = doubleRand(prng);
+  }
+
+  // Allocating and copying to device
+  cuda_utilities::DeviceVector<double> dev_grid(host_grid.size());
+  dev_grid.cpyHostToDevice(host_grid);
+
+  // Fiducial Data
+  std::vector<std::unordered_map<int, double>> fiducial_interface_left = {{{26, 3.8877922383184833},
+                                                                           {27, 0.70033864721549188},
+                                                                           {106, 5.6625525038177784},
+                                                                           {107, 3.0633780053857027},
+                                                                           {186, 4.0069556576401011},
+                                                                           {187, 2.1015872413794123},
+                                                                           {266, 5.1729859852329314},
+                                                                           {267, 3.9675148506537838},
+                                                                           {346, 9.6301414677176531},
+                                                                           {347, 21.091316282933843}},
+                                                                          {{21, 0.74780807318015607},
+                                                                           {37, 0.19457128219588618},
+                                                                           {101, 5.6515522777659895},
+                                                                           {117, 4.4286255636679313},
+                                                                           {181, 0.13115998072061905},
+                                                                           {197, 2.2851440769830953},
+                                                                           {261, 1.5834637771067519},
+                                                                           {277, 2.697375839048191},
+                                                                           {341, 23.043749364531674},
+                                                                           {357, 82.515887983144168}},
+                                                                          {{25, 2.2863650183226212},
+                                                                           {29, 1.686415421301841},
+                                                                           {105, 0.72340346106443465},
+                                                                           {109, 5.9563546443402542},
+                                                                           {185, 3.6128571662018358},
+                                                                           {189, 5.3735653401079038},
+                                                                           {265, 0.95177493689267167},
+                                                                           {269, 0.46056494878491938},
+                                                                           {345, 3.1670194578067843},
+                                                                           {349, 19.142817472509272}}};
+
+  std::vector<std::unordered_map<int, double>> fiducial_interface_right =
+
+      {{{25, 3.8877922383184833},
+        {26, 0.70033864721549188},
+        {105, 1.594778794367564},
+        {106, 3.0633780053857027},
+        {185, 4.0069556576401011},
+        {186, 2.1015872413794123},
+        {265, 1.7883678016935782},
+        {266, 3.9675148506537838},
+        {345, 2.8032969746372531},
+        {346, 21.091316282933843}},
+       {{17, 0.43265217076853835},
+        {33, 0.19457128219588618},
+        {97, 3.2697645945288754},
+        {113, 4.4286255636679313},
+        {177, 0.07588397666718491},
+        {193, 2.2851440769830953},
+        {257, 0.91612950577699748},
+        {273, 2.697375839048191},
+        {337, 13.332201861384396},
+        {353, 82.515887983144168}},
+       {{5, 2.2863650183226212},
+        {9, 1.686415421301841},
+        {85, 0.72340346106443465},
+        {89, 1.77925054463361},
+        {165, 5.3997753452111859},
+        {169, 1.4379190463124141},
+        {245, 0.95177493689267167},
+        {249, 0.46056494878491938},
+        {325, 6.6889498465051398},
+        {329, 1.6145084086614285}}}
+
+  ;
+
+  // Loop over different directions
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Assign the shape
+    size_t nx_rot, ny_rot, nz_rot;
+    switch (direction) {
+      case 0:
+        nx_rot = nx;
+        ny_rot = ny;
+        nz_rot = nz;
+        break;
+      case 1:
+        nx_rot = ny;
+        ny_rot = nz;
+        nz_rot = nx;
+        break;
+      case 2:
+        nx_rot = nz;
+        ny_rot = nx;
+        nz_rot = ny;
+        break;
+    }
+
+    // Allocate device buffers
+    cuda_utilities::DeviceVector<double> dev_interface_left(host_grid.size(), true);
+    cuda_utilities::DeviceVector<double> dev_interface_right(host_grid.size(), true);
+
+    // Launch kernel
+    hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                       dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields);
+    GPU_Error_Check();
+    GPU_Error_Check(cudaDeviceSynchronize());
+
+    // Perform Comparison
+    for (size_t i = 0; i < host_grid.size(); i++) {
+      // Check the left interface
+      double test_val = dev_interface_left.at(i);
+      double fiducial_val =
+          (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end())
+              ? 0.0
+              : fiducial_interface_left.at(direction)[i];
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+
+      // Check the right interface
+      test_val     = dev_interface_right.at(i);
+      fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end())
+                         ? 0.0
+                         : fiducial_interface_right.at(direction)[i];
+
+      // if (test_val != 0.0) std::cout << "{" << i << ", " << to_string_exact(test_val) << "}," << std::endl;
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+    }
+  }
+}
+
+TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput)
+{
+  // Set up PRNG to use
+  std::mt19937_64 prng(42);
+  std::uniform_real_distribution<double> doubleRand(0.1, 5);
+
+  // Mock up needed information
+  size_t const nx = 4, ny = nx, nz = nx;
+  size_t const n_fields          = 8;
+  size_t const n_cells_grid      = nx * ny * nz * n_fields;
+  size_t const n_cells_interface = nx * ny * nz * (n_fields - 1);
+  double const dx                = doubleRand(prng);
+  double const dt                = doubleRand(prng);
+  double const gamma             = 5.0 / 3.0;
+
+  // Setup host grid. Fill host grid with random values and randomly assign maximum value
+  std::vector<double> host_grid(n_cells_grid);
+  for (Real &val : host_grid) {
+    val = doubleRand(prng);
+  }
+
+  // Allocating and copying to device
+  cuda_utilities::DeviceVector<double> dev_grid(host_grid.size());
+  dev_grid.cpyHostToDevice(host_grid);
+
+  // Fiducial Data
+  std::vector<std::unordered_map<int, double>> fiducial_interface_left  = {{{21, 0.59023012197434721},
+                                                                            {85, 3.0043379408547275},
+                                                                            {149, 2.6320759184913625},
+                                                                            {213, 0.9487867623146744},
+                                                                            {277, 18.551193003661723},
+                                                                            {341, 1.8587936590169301},
+                                                                            {405, 2.1583975283044725}},
+                                                                           {{21, 0.73640639402573249},
+                                                                            {85, 3.3462413154443715},
+                                                                            {149, 2.1945584994458125},
+                                                                            {213, 1.1837630990406585},
+                                                                            {277, 17.570011907061254},
+                                                                            {341, 2.1583975283044725},
+                                                                            {405, 1.7033818819502551}},
+                                                                           {{21, 0.25340904981266843},
+                                                                            {85, 2.0441984720128734},
+                                                                            {149, 1.9959059157695584},
+                                                                            {213, 0.45377591914009824},
+                                                                            {277, 24.018953780483471},
+                                                                            {341, 1.7033818819502551},
+                                                                            {405, 1.8587936590169301}}};
+  std::vector<std::unordered_map<int, double>> fiducial_interface_right = {{{20, 0.59023012197434721},
+                                                                            {84, 3.0043379408547275},
+                                                                            {148, 2.6320759184913625},
+                                                                            {212, 0.9487867623146744},
+                                                                            {276, 22.111134849009044},
+                                                                            {340, 1.8587936590169301},
+                                                                            {404, 2.1583975283044725}},
+                                                                           {
+                                                                               {17, 0.44405384992296193},
+                                                                               {81, 2.5027813113931279},
+                                                                               {145, 2.6371119205792346},
+                                                                               {209, 1.0210845222961809},
+                                                                               {273, 21.353253570231175},
+                                                                               {337, 2.1634182515826184},
+                                                                               {401, 1.7033818819502551},
+                                                                           },
+                                                                           {
+                                                                               {5, 0.92705119413602599},
+                                                                               {69, 1.9592598982258778},
+                                                                               {133, 0.96653490574340428},
+                                                                               {197, 1.3203867992383289},
+                                                                               {261, 7.9217487636977353},
+                                                                               {325, 1.8629714367312684},
+                                                                               {389, 1.8587936590169301},
+                                                                           }};
+
+  // Loop over different directions
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Allocate device buffers
+    cuda_utilities::DeviceVector<double> dev_interface_left(n_cells_interface, true);
+    cuda_utilities::DeviceVector<double> dev_interface_right(n_cells_interface, true);
+
+    // Launch kernel
+    hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                       dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields);
+    GPU_Error_Check();
+    GPU_Error_Check(cudaDeviceSynchronize());
+
+    // Perform Comparison
+    for (size_t i = 0; i < dev_interface_right.size(); i++) {
+      // Check the left interface
+      double test_val = dev_interface_left.at(i);
+      double fiducial_val =
+          (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end())
+              ? 0.0
+              : fiducial_interface_left.at(direction)[i];
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+
+      // Check the right interface
+      test_val     = dev_interface_right.at(i);
+      fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end())
+                         ? 0.0
+                         : fiducial_interface_right.at(direction)[i];
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+    }
+  }
+}
diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu
index 2a6b637f7..e8cfa0d09 100644
--- a/src/reconstruction/plmp_cuda.cu
+++ b/src/reconstruction/plmp_cuda.cu
@@ -1,34 +1,42 @@
 /*! \file plmp_cuda.cu
  *  \brief Definitions of the piecewise linear reconstruction functions for
            with limiting in the primitive variables. */
-#ifdef CUDA
 
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../reconstruction/plmp_cuda.h"
+#include "../utils/gpu.hpp"
 
-#ifdef DE //PRESSURE_DE
-#include "../utils/hydro_utilities.h"
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
 #endif
 
-
-/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using plm. */
-__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
+/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real
+ *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real
+ gamma, int dir, int n_fields)
+ *  \brief When passed a stencil of conserved variables, returns the left and
+ right boundary values for the interface calculated using plm. */
+__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz,
+                          int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
 {
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
   int o1, o2, o3;
   if (dir == 0) {
-    o1 = 1; o2 = 2; o3 = 3;
+    o1 = 1;
+    o2 = 2;
+    o3 = 3;
   }
   if (dir == 1) {
-    o1 = 2; o2 = 3; o3 = 1;
+    o1 = 2;
+    o2 = 3;
+    o3 = 1;
   }
   if (dir == 2) {
-    o1 = 3; o2 = 1; o3 = 2;
+    o1 = 3;
+    o2 = 1;
+    o3 = 2;
   }
 
   // declare primitive variables in the stencil
@@ -44,193 +52,213 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
   Real mx_L, my_L, mz_L, E_L;
   Real mx_R, my_R, mz_R, E_R;
 
-  #ifdef DE
+#ifdef DE
   Real ge_i, ge_imo, ge_ipo, ge_L, ge_R, dge_L, dge_R, E_kin, E, dge;
-  #endif
-  #ifdef SCALAR
+#endif  // DE
+#ifdef SCALAR
   Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS];
   Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS];
-  #endif
+#endif  // SCALAR
 
-  #ifndef VL //Don't use velocities to reconstruct when using VL
-  Real dtodx = dt/dx;
+#ifndef VL  // Don't use velocities to reconstruct when using VL
+  Real dtodx = dt / dx;
   Real dfl, dfr, mxfl, mxfr, myfl, myfr, mzfl, mzfr, Efl, Efr;
   #ifdef DE
   Real gefl, gefr;
-  #endif
+  #endif  // DE
   #ifdef SCALAR
   Real scalarfl[NSCALARS], scalarfr[NSCALARS];
-  #endif
-  #endif
+  #endif  // SCALAR
+#endif    // VL
 
   // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId*blockDim.x;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   int xs, xe, ys, ye, zs, ze;
   if (dir == 0) {
-    xs = 1; xe = nx-2;
-    ys = 0; ye = ny;
-    zs = 0; ze = nz;
+    xs = 1;
+    xe = nx - 2;
+    ys = 0;
+    ye = ny;
+    zs = 0;
+    ze = nz;
   }
   if (dir == 1) {
-    xs = 0; xe = nx;
-    ys = 1; ye = ny-2;
-    zs = 0; ze = nz;
+    xs = 0;
+    xe = nx;
+    ys = 1;
+    ye = ny - 2;
+    zs = 0;
+    ze = nz;
   }
   if (dir == 2) {
-    xs = 0; xe = nx;
-    ys = 0; ye = ny;
-    zs = 1; ze = nz-2;
+    xs = 0;
+    xe = nx;
+    ys = 0;
+    ye = ny;
+    zs = 1;
+    ze = nz - 2;
   }
 
-
-  if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze)
-  {
+  if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) {
     // load the 3-cell stencil into registers
     // cell i
-    id = xid + yid*nx + zid*nx*ny;
-    d_i  =  dev_conserved[            id];
-    vx_i =  dev_conserved[o1*n_cells + id] / d_i;
-    vy_i =  dev_conserved[o2*n_cells + id] / d_i;
-    vz_i =  dev_conserved[o3*n_cells + id] / d_i;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_i  = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_i  = fmax(p_i, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_i[i] = dev_conserved[(5+i)*n_cells + id] / d_i;
+    id   = xid + yid * nx + zid * nx * ny;
+    d_i  = dev_conserved[id];
+    vx_i = dev_conserved[o1 * n_cells + id] / d_i;
+    vy_i = dev_conserved[o2 * n_cells + id] / d_i;
+    vz_i = dev_conserved[o3 * n_cells + id] / d_i;
+#ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_i   = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+#else
+    p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0);
+#endif  // PRESSURE_DE
+    p_i = fmax(p_i, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i;
     }
-    #endif
-    #ifdef DE
+#endif  // SCALAR
+#ifdef DE
     ge_i = dge / d_i;
-    #endif
+#endif  // DE
     // cell i-1
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    d_imo  =  dev_conserved[            id];
-    vx_imo =  dev_conserved[o1*n_cells + id] / d_imo;
-    vy_imo =  dev_conserved[o2*n_cells + id] / d_imo;
-    vz_imo =  dev_conserved[o3*n_cells + id] / d_imo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_imo * ( vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_imo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_imo  = (dev_conserved[4*n_cells + id] - 0.5*d_imo*(vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_imo  = fmax(p_imo, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_imo[i] = dev_conserved[(5+i)*n_cells + id] / d_imo;
+    if (dir == 0) {
+      id = xid - 1 + yid * nx + zid * nx * ny;
+    }
+    if (dir == 1) {
+      id = xid + (yid - 1) * nx + zid * nx * ny;
     }
-    #endif
-    #ifdef DE
+    if (dir == 2) {
+      id = xid + yid * nx + (zid - 1) * nx * ny;
+    }
+    d_imo  = dev_conserved[id];
+    vx_imo = dev_conserved[o1 * n_cells + id] / d_imo;
+    vy_imo = dev_conserved[o2 * n_cells + id] / d_imo;
+    vz_imo = dev_conserved[o3 * n_cells + id] / d_imo;
+#ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+#else
+    p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) *
+            (gamma - 1.0);
+#endif  // PRESSURE_DE
+    p_imo = fmax(p_imo, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo;
+    }
+#endif  // SCALAR
+#ifdef DE
     ge_imo = dge / d_imo;
-    #endif
+#endif  // DE
     // cell i+1
-    if (dir == 0) id = xid+1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid+1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid+1)*nx*ny;
-    d_ipo  =  dev_conserved[            id];
-    vx_ipo =  dev_conserved[o1*n_cells + id] / d_ipo;
-    vy_ipo =  dev_conserved[o2*n_cells + id] / d_ipo;
-    vz_ipo =  dev_conserved[o3*n_cells + id] / d_ipo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_ipo * ( vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_ipo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_ipo  = (dev_conserved[4*n_cells + id] - 0.5*d_ipo*(vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_ipo  = fmax(p_ipo, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_ipo[i] = dev_conserved[(5+i)*n_cells + id] / d_ipo;
+    if (dir == 0) {
+      id = xid + 1 + yid * nx + zid * nx * ny;
     }
-    #endif
-    #ifdef DE
-    ge_ipo =  dge / d_ipo;
-    #endif
-
+    if (dir == 1) {
+      id = xid + (yid + 1) * nx + zid * nx * ny;
+    }
+    if (dir == 2) {
+      id = xid + yid * nx + (zid + 1) * nx * ny;
+    }
+    d_ipo  = dev_conserved[id];
+    vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo;
+    vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo;
+    vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo;
+#ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+#else
+    p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) *
+            (gamma - 1.0);
+#endif  // PRESSURE_DE
+    p_ipo = fmax(p_ipo, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo;
+    }
+#endif  // SCALAR
+#ifdef DE
+    ge_ipo = dge / d_ipo;
+#endif  // DE
 
     // Calculate the interface values for each primitive variable
-    Interface_Values_PLM(d_imo,  d_i,  d_ipo,  &d_L,  &d_R);
+    Interface_Values_PLM(d_imo, d_i, d_ipo, &d_L, &d_R);
     Interface_Values_PLM(vx_imo, vx_i, vx_ipo, &vx_L, &vx_R);
     Interface_Values_PLM(vy_imo, vy_i, vy_ipo, &vy_L, &vy_R);
     Interface_Values_PLM(vz_imo, vz_i, vz_ipo, &vz_L, &vz_R);
-    Interface_Values_PLM(p_imo,  p_i,  p_ipo,  &p_L,  &p_R);
-    #ifdef DE
-    Interface_Values_PLM(ge_imo,  ge_i,  ge_ipo,  &ge_L,  &ge_R);
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      Interface_Values_PLM(scalar_imo[i],  scalar_i[i],  scalar_ipo[i],  &scalar_L[i],  &scalar_R[i]);
+    Interface_Values_PLM(p_imo, p_i, p_ipo, &p_L, &p_R);
+#ifdef DE
+    Interface_Values_PLM(ge_imo, ge_i, ge_ipo, &ge_L, &ge_R);
+#endif  // DE
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      Interface_Values_PLM(scalar_imo[i], scalar_i[i], scalar_ipo[i], &scalar_L[i], &scalar_R[i]);
     }
-    #endif
+#endif  // SCALAR
 
     // Apply mimimum constraints
-    d_L = fmax(d_L, (Real) TINY_NUMBER);
-    d_R = fmax(d_R, (Real) TINY_NUMBER);
-    p_L = fmax(p_L, (Real) TINY_NUMBER);
-    p_R = fmax(p_R, (Real) TINY_NUMBER);
+    d_L = fmax(d_L, (Real)TINY_NUMBER);
+    d_R = fmax(d_R, (Real)TINY_NUMBER);
+    p_L = fmax(p_L, (Real)TINY_NUMBER);
+    p_R = fmax(p_R, (Real)TINY_NUMBER);
 
     // calculate the conserved variables at each interface
-    mx_L = d_L*vx_L;
-    mx_R = d_R*vx_R;
-    my_L = d_L*vy_L;
-    my_R = d_R*vy_R;
-    mz_L = d_L*vz_L;
-    mz_R = d_R*vz_R;
-    E_L = p_L/(gamma-1.0) + 0.5*d_L*(vx_L*vx_L + vy_L*vy_L + vz_L*vz_L);
-    E_R = p_R/(gamma-1.0) + 0.5*d_R*(vx_R*vx_R + vy_R*vy_R + vz_R*vz_R);
-    #ifdef DE
-    dge_L = d_L*ge_L;
-    dge_R = d_R*ge_R;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dscalar_L[i] = d_L*scalar_L[i];
-      dscalar_R[i] = d_R*scalar_R[i];
+    mx_L = d_L * vx_L;
+    mx_R = d_R * vx_R;
+    my_L = d_L * vy_L;
+    my_R = d_R * vy_R;
+    mz_L = d_L * vz_L;
+    mz_R = d_R * vz_R;
+    E_L  = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L);
+    E_R  = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R);
+#ifdef DE
+    dge_L = d_L * ge_L;
+    dge_R = d_R * ge_R;
+#endif  // DE
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dscalar_L[i] = d_L * scalar_L[i];
+      dscalar_R[i] = d_R * scalar_R[i];
     }
-    #endif
+#endif  // SCALAR
 
-    // #ifdef CTU
-    #ifndef VL //Don't use velocities to reconstruct when using VL
+// #ifdef CTU
+#ifndef VL  // Don't use velocities to reconstruct when using VL
     // calculate fluxes for each variable
-    dfl = mx_L;
-    dfr = mx_R;
-    mxfl = mx_L*vx_L + p_L;
-    mxfr = mx_R*vx_R + p_R;
-    myfl = mx_L*vy_L;
-    myfr = mx_R*vy_R;
-    mzfl = mx_L*vz_L;
-    mzfr = mx_R*vz_R;
-    Efl = (E_L + p_L) * vx_L;
-    Efr = (E_R + p_R) * vx_R;
-    #ifdef DE
-    gefl = dge_L*vx_L;
-    gefr = dge_R*vx_R;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalarfl[i] = dscalar_L[i]*vx_L;
-      scalarfr[i] = dscalar_R[i]*vx_R;
+    dfl  = mx_L;
+    dfr  = mx_R;
+    mxfl = mx_L * vx_L + p_L;
+    mxfr = mx_R * vx_R + p_R;
+    myfl = mx_L * vy_L;
+    myfr = mx_R * vy_R;
+    mzfl = mx_L * vz_L;
+    mzfr = mx_R * vz_R;
+    Efl  = (E_L + p_L) * vx_L;
+    Efr  = (E_R + p_R) * vx_R;
+  #ifdef DE
+    gefl = dge_L * vx_L;
+    gefr = dge_R * vx_R;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalarfl[i] = dscalar_L[i] * vx_L;
+      scalarfr[i] = dscalar_R[i] * vx_R;
     }
-    #endif
+  #endif  // SCALAR
 
     // Evolve the boundary extrapolated values half a timestep.
     d_L += 0.5 * (dtodx) * (dfl - dfr);
@@ -243,93 +271,95 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
     mz_R += 0.5 * (dtodx) * (mzfl - mzfr);
     E_L += 0.5 * (dtodx) * (Efl - Efr);
     E_R += 0.5 * (dtodx) * (Efl - Efr);
-    #ifdef DE
+  #ifdef DE
     dge_L += 0.5 * (dtodx) * (gefl - gefr);
     dge_R += 0.5 * (dtodx) * (gefl - gefr);
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
       dscalar_L[i] += 0.5 * (dtodx) * (scalarfl[i] - scalarfr[i]);
       dscalar_R[i] += 0.5 * (dtodx) * (scalarfl[i] - scalarfr[i]);
     }
-    #endif
+  #endif  // SCALAR
 
-    #endif //NO VL
+#endif  // NO VL
 
-    // Convert the left and right states in the primitive to the conserved variables
-    // send final values back from kernel
-    // bounds_R refers to the right side of the i-1/2 interface
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    dev_bounds_R[            id] = d_L;
-    dev_bounds_R[o1*n_cells + id] = mx_L;
-    dev_bounds_R[o2*n_cells + id] = my_L;
-    dev_bounds_R[o3*n_cells + id] = mz_L;
-    dev_bounds_R[4*n_cells + id] = E_L;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_R[(5+i)*n_cells + id] = dscalar_L[i];
+    // Convert the left and right states in the primitive to the conserved
+    // variables send final values back from kernel bounds_R refers to the right
+    // side of the i-1/2 interface
+    if (dir == 0) {
+      id = xid - 1 + yid * nx + zid * nx * ny;
+    }
+    if (dir == 1) {
+      id = xid + (yid - 1) * nx + zid * nx * ny;
     }
-    #endif
-    #ifdef DE
-    dev_bounds_R[(n_fields-1)*n_cells + id] = dge_L;
-    #endif
+    if (dir == 2) {
+      id = xid + yid * nx + (zid - 1) * nx * ny;
+    }
+    dev_bounds_R[id]                = d_L;
+    dev_bounds_R[o1 * n_cells + id] = mx_L;
+    dev_bounds_R[o2 * n_cells + id] = my_L;
+    dev_bounds_R[o3 * n_cells + id] = mz_L;
+    dev_bounds_R[4 * n_cells + id]  = E_L;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_R[(5 + i) * n_cells + id] = dscalar_L[i];
+    }
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_R[(n_fields - 1) * n_cells + id] = dge_L;
+#endif  // DE
     // bounds_L refers to the left side of the i+1/2 interface
-    id = xid + yid*nx + zid*nx*ny;
-    dev_bounds_L[            id] = d_R;
-    dev_bounds_L[o1*n_cells + id] = mx_R;
-    dev_bounds_L[o2*n_cells + id] = my_R;
-    dev_bounds_L[o3*n_cells + id] = mz_R;
-    dev_bounds_L[4*n_cells + id] = E_R;
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_L[(5+i)*n_cells + id] = dscalar_R[i];
+    id                              = xid + yid * nx + zid * nx * ny;
+    dev_bounds_L[id]                = d_R;
+    dev_bounds_L[o1 * n_cells + id] = mx_R;
+    dev_bounds_L[o2 * n_cells + id] = my_R;
+    dev_bounds_L[o3 * n_cells + id] = mz_R;
+    dev_bounds_L[4 * n_cells + id]  = E_R;
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_L[(5 + i) * n_cells + id] = dscalar_R[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_L[(n_fields-1)*n_cells + id] = dge_R;
-    #endif
-
+#endif  // SCALAR
+#ifdef DE
+    dev_bounds_L[(n_fields - 1) * n_cells + id] = dge_R;
+#endif  // DE
   }
 }
 
-
-
 __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R)
 {
   Real del_q_L, del_q_R, del_q_C, del_q_G;
   Real lim_slope_a, lim_slope_b, del_q_m;
 
-  // Compute the left, right, centered, and Van Leer differences of the primitive variables
-  // Note that here L and R refer to locations relative to the cell center
+  // Compute the left, right, centered, and Van Leer differences of the
+  // primitive variables Note that here L and R refer to locations relative to
+  // the cell center
 
   // left
-  del_q_L  = q_i - q_imo;
+  del_q_L = q_i - q_imo;
   // right
-  del_q_R  = q_ipo - q_i;
+  del_q_R = q_ipo - q_i;
   // centered
-  del_q_C  = 0.5*(q_ipo - q_imo);
+  del_q_C = 0.5 * (q_ipo - q_imo);
   // Van Leer
-  if (del_q_L*del_q_R > 0.0) { del_q_G = 2.0*del_q_L*del_q_R / (del_q_L+del_q_R); }
-  else { del_q_G = 0.0; }
+  if (del_q_L * del_q_R > 0.0) {
+    del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R);
+  } else {
+    del_q_G = 0.0;
+  }
 
   // Monotonize the differences
   lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R));
   lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G));
 
   // Minmod limiter
-  //del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C));
+  // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C));
 
   // Van Leer limiter
-  del_q_m = sgn_CUDA(del_q_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-
+  del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b);
 
   // Calculate the left and right interface values using the limited slopes
-  *q_L = q_i - 0.5*del_q_m;
-  *q_R = q_i + 0.5*del_q_m;
-
+  *q_L = q_i - 0.5 * del_q_m;
+  *q_R = q_i + 0.5 * del_q_m;
 }
-
-
-#endif //CUDA
diff --git a/src/reconstruction/plmp_cuda.h b/src/reconstruction/plmp_cuda.h
index 9cf5f01a3..34faa14df 100644
--- a/src/reconstruction/plmp_cuda.h
+++ b/src/reconstruction/plmp_cuda.h
@@ -1,25 +1,24 @@
 /*! \file plmp_cuda.h
  *  \brief Declarations of the cuda plmp kernels. */
 
-#ifdef CUDA
-
 #ifndef PLMP_CUDA_H
 #define PLMP_CUDA_H
 
-
 #include "../global/global.h"
 
-/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using plmp. */
-__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields);
-
-
-/*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R)
- *  \brief Calculates the left and right interface values for a cell using linear reconstruction
-           in the primitive variables with Van Leer or Minmod slope limiting. */
+/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real
+ *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real
+ gamma, int dir, int n_fields)
+ *  \brief When passed a stencil of conserved variables, returns the left and
+ right boundary values for the interface calculated using plmp. */
+__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz,
+                          int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields);
+
+/*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo,
+ Real *q_L, Real *q_R)
+ *  \brief Calculates the left and right interface values for a cell using
+ linear reconstruction in the primitive variables with Van Leer or Minmod slope
+ limiting. */
 __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R);
 
-
-#endif // PLMP_CUDA_H
-#endif // CUDA
+#endif  // PLMP_CUDA_H
diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu
index 2ca1b62df..4db993d70 100644
--- a/src/reconstruction/ppmc_cuda.cu
+++ b/src/reconstruction/ppmc_cuda.cu
@@ -1,1101 +1,696 @@
 /*! \file ppmc_cuda.cu
- *  \brief Functions definitions for the ppm kernels, using characteristic tracing.
-           Written following Stone et al. 2008. */
-#ifdef CUDA
-#ifdef PPMC
+ *  \brief Functions definitions for the ppm kernels, using characteristic
+ tracing. Written following Stone et al. 2008. */
 
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../reconstruction/ppmc_cuda.h"
-
-#ifdef DE //PRESSURE_DE
+#include "../reconstruction/reconstruction.h"
+#include "../utils/gpu.hpp"
 #include "../utils/hydro_utilities.h"
-#endif
 
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
+#endif
 
-/*! \fn void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using ppm. */
-__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
+// =====================================================================================================================
+/*!
+ *  \brief When passed a stencil of conserved variables, returns the left and
+ right boundary values for the interface calculated using ppm. */
+__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx,
+                         Real dt, Real gamma, int dir)
 {
-  int n_cells = nx*ny*nz;
-  int o1, o2, o3;
-  if (dir == 0 ) {
-    o1 = 1; o2 = 2; o3 = 3;
-  }
-  if (dir == 1 ) {
-    o1 = 2; o2 = 3; o3 = 1;
-  }
-  if (dir == 2 ) {
-    o1 = 3; o2 = 1; o3 = 2;
-  }
+  // get a thread ID
+  int const thread_id = threadIdx.x + blockIdx.x * blockDim.x;
+  int xid, yid, zid;
+  cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid);
 
-  // declare primitive variables for each stencil
-  // these will be placed into registers for each thread
-  Real d_i, vx_i, vy_i, vz_i, p_i;
-  Real d_imo, vx_imo, vy_imo, vz_imo, p_imo;
-  Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo;
-  Real d_imt, vx_imt, vy_imt, vz_imt, p_imt;
-  Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt;
-
-  // declare other variables to be used
-  Real a;
-  Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L;
-  Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R;
-  Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C;
-  Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G;
-  Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L;
-  Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R;
-  Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C;
-  Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G;
-  Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m;
-  Real lim_slope_a, lim_slope_b;
-  Real del_d_m_imo, del_vx_m_imo, del_vy_m_imo, del_vz_m_imo, del_p_m_imo;
-  Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i;
-  Real del_d_m_ipo, del_vx_m_ipo, del_vy_m_ipo, del_vz_m_ipo, del_p_m_ipo;
-  Real d_L, vx_L, vy_L, vz_L, p_L;
-  Real d_R, vx_R, vy_R, vz_R, p_R;
-
-  // #ifdef CTU
-  #ifndef VL
-  Real dtodx = dt/dx;
-  Real d_6, vx_6, vy_6, vz_6, p_6;
-  Real lambda_m, lambda_0, lambda_p;
-  Real lambda_max, lambda_min;
-  Real A, B, C, D;
-  Real chi_1, chi_2, chi_3, chi_4, chi_5;
-  Real sum_1, sum_2, sum_3, sum_4, sum_5;
-  #endif //CTU
-
-  #ifdef DE
-  Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt;
-  Real del_ge_L, del_ge_R, del_ge_C, del_ge_G;
-  Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo;
-  Real ge_L, ge_R;
-  Real  E_kin, E, dge;
-  // #ifdef CTU
-  #ifndef VL
-  Real chi_ge, sum_ge, ge_6;
-  #endif
-  #endif
-  #ifdef SCALAR
-  Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS];
-  Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS];
-  Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS];
-  Real scalar_L[NSCALARS], scalar_R[NSCALARS];
-  // #ifdef CTU
-  #ifndef VL
-  Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS];
-  #endif
-  #endif
+  if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) {
+    return;
+  }
 
+  // Compute the total number of cells
+  int const n_cells = nx * ny * nz;
 
-  // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId * blockDim.x;
-  int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
-
-  int xs, xe, ys, ye, zs, ze;
-  if (dir == 0) {
-    xs = 2; xe = nx-3;
-    ys = 0; ye = ny;
-    zs = 0; ze = nz;
-  }
-  if (dir == 1) {
-    xs = 0; xe = nx;
-    ys = 2; ye = ny-3;
-    zs = 0; ze = nz;
-  }
-  if (dir == 2) {
-    xs = 0; xe = nx;
-    ys = 0; ye = ny;
-    zs = 2; ze = nz-3;
+  // Set the field indices for the various directions
+  int o1, o2, o3;
+  switch (dir) {
+    case 0:
+      o1 = grid_enum::momentum_x;
+      o2 = grid_enum::momentum_y;
+      o3 = grid_enum::momentum_z;
+      break;
+    case 1:
+      o1 = grid_enum::momentum_y;
+      o2 = grid_enum::momentum_z;
+      o3 = grid_enum::momentum_x;
+      break;
+    case 2:
+      o1 = grid_enum::momentum_z;
+      o2 = grid_enum::momentum_x;
+      o3 = grid_enum::momentum_y;
+      break;
   }
 
-  if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze)
-  {
-    // load the 5-cell stencil into registers
-    // cell i
-    id = xid + yid*nx + zid*nx*ny;
-    d_i  =  dev_conserved[            id];
-    vx_i =  dev_conserved[o1*n_cells + id] / d_i;
-    vy_i =  dev_conserved[o2*n_cells + id] / d_i;
-    vz_i =  dev_conserved[o3*n_cells + id] / d_i;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_i  = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_i  = fmax(p_i, (Real) TINY_NUMBER);
-    #ifdef DE
-    ge_i =  dge / d_i;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_i[i] =  dev_conserved[(5+i)*n_cells + id] / d_i;
-    }
-    #endif
-    // cell i-1
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    d_imo  =  dev_conserved[            id];
-    vx_imo =  dev_conserved[o1*n_cells + id] / d_imo;
-    vy_imo =  dev_conserved[o2*n_cells + id] / d_imo;
-    vz_imo =  dev_conserved[o3*n_cells + id] / d_imo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_imo * ( vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_imo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_imo  = (dev_conserved[4*n_cells + id] - 0.5*d_imo*(vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_imo  = fmax(p_imo, (Real) TINY_NUMBER);
-    #ifdef DE
-    ge_imo =  dge / d_imo;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_imo[i]  =  dev_conserved[(5+i)*n_cells + id] / d_imo;
-    }
-    #endif
-    // cell i+1
-    if (dir == 0) id = xid+1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid+1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid+1)*nx*ny;
-    d_ipo  =  dev_conserved[            id];
-    vx_ipo =  dev_conserved[o1*n_cells + id] / d_ipo;
-    vy_ipo =  dev_conserved[o2*n_cells + id] / d_ipo;
-    vz_ipo =  dev_conserved[o3*n_cells + id] / d_ipo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_ipo * ( vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_ipo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_ipo  = (dev_conserved[4*n_cells + id] - 0.5*d_ipo*(vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_ipo  = fmax(p_ipo, (Real) TINY_NUMBER);
-    #ifdef DE
-    ge_ipo =  dge / d_ipo;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_ipo[i]  =  dev_conserved[(5+i)*n_cells + id] / d_ipo;
-    }
-    #endif
-    // cell i-2
-    if (dir == 0) id = xid-2 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-2)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-2)*nx*ny;
-    d_imt  =  dev_conserved[            id];
-    vx_imt =  dev_conserved[o1*n_cells + id] / d_imt;
-    vy_imt =  dev_conserved[o2*n_cells + id] / d_imt;
-    vz_imt =  dev_conserved[o3*n_cells + id] / d_imt;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_imt * ( vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_imt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_imt  = (dev_conserved[4*n_cells + id] - 0.5*d_imt*(vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_imt  = fmax(p_imt, (Real) TINY_NUMBER);
-    #ifdef DE
-    ge_imt =  dge / d_imt;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_imt[i]  =  dev_conserved[(5+i)*n_cells + id] / d_imt;
-    }
-    #endif
-    // cell i+2
-    if (dir == 0) id = xid+2 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid+2)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid+2)*nx*ny;
-    d_ipt  =  dev_conserved[            id];
-    vx_ipt =  dev_conserved[o1*n_cells + id] / d_ipt;
-    vy_ipt =  dev_conserved[o2*n_cells + id] / d_ipt;
-    vz_ipt =  dev_conserved[o3*n_cells + id] / d_ipt;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_ipt * ( vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_ipt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_ipt  = (dev_conserved[4*n_cells + id] - 0.5*d_ipt*(vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_ipt  = fmax(p_ipt, (Real) TINY_NUMBER);
-    #ifdef DE
-    ge_ipt =  dge / d_ipt;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_ipt[i]  =  dev_conserved[(5+i)*n_cells + id] / d_ipt;
-    }
-    #endif
-
-    //printf("%d %d %d %f %f %f %f %f\n", xid, yid, zid, d_i, vx_i, vy_i, vz_i, p_i);
-
-    // Steps 2 - 5 are repeated for cell i-1, i, and i+1
-    // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables
-    //          Note that here L and R refer to locations relative to the cell center
-    //          Stone Eqn 36
-
-    // calculate the adiabatic sound speed in cell imo
-    a = sqrt(gamma*p_imo/d_imo);
-
-    // left
-    del_d_L  = d_imo - d_imt;
-    del_vx_L = vx_imo - vx_imt;
-    del_vy_L = vy_imo - vy_imt;
-    del_vz_L = vz_imo - vz_imt;
-    del_p_L  = p_imo  - p_imt;
-
-    // right
-    del_d_R  = d_i  - d_imo;
-    del_vx_R = vx_i - vx_imo;
-    del_vy_R = vy_i - vy_imo;
-    del_vz_R = vz_i - vz_imo;
-    del_p_R  = p_i  - p_imo;
-
-    // centered
-    del_d_C  = 0.5*(d_i - d_imt);
-    del_vx_C = 0.5*(vx_i - vx_imt);
-    del_vy_C = 0.5*(vy_i - vy_imt);
-    del_vz_C = 0.5*(vz_i - vz_imt);
-    del_p_C  = 0.5*(p_i - p_imt);
-
-    // Van Leer
-    if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); }
-    else { del_d_G = 0.0; }
-    if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); }
-    else { del_vx_G = 0.0; }
-    if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); }
-    else { del_vy_G = 0.0; }
-    if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); }
-    else { del_vz_G = 0.0; }
-    if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); }
-    else { del_p_G = 0.0; }
-
-    #ifdef DE
-    del_ge_L  = ge_imo  - ge_imt;
-    del_ge_R  = ge_i  - ge_imo;
-    del_ge_C  = 0.5*(ge_i - ge_imt);
-    if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); }
-    else { del_ge_G = 0.0; }
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      del_scalar_L[i]  = scalar_imo[i]  - scalar_imt[i];
-      del_scalar_R[i]  = scalar_i[i]  - scalar_imo[i];
-      del_scalar_C[i]  = 0.5*(scalar_i[i] - scalar_imt[i]);
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); }
-      else { del_scalar_G[i] = 0.0; }
-    }
-    #endif
+  // load the 5-cell stencil into registers
+  // cell i
+  reconstruction::Primitive const cell_i =
+      reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma);
 
+  // cell i-1. The equality checks check the direction and subtracts one from the direction
+  // im1 stands for "i minus 1"
+  reconstruction::Primitive const cell_im1 = reconstruction::Load_Data(
+      dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
 
-    // Step 3 - Project the left, right, centered and van Leer differences onto the characteristic variables
-    //          Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation)
-    //          Use the eigenvectors given in Stone 2008, Appendix A
+  // cell i+1. The equality checks check the direction and adds one to the direction
+  // ip1 stands for "i plus 1"
+  reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data(
+      dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
 
-    del_a_0_L = -0.5*d_imo*del_vx_L/a + 0.5*del_p_L/(a*a);
-    del_a_1_L = del_d_L - del_p_L/(a*a);
-    del_a_2_L = del_vy_L;
-    del_a_3_L = del_vz_L;
-    del_a_4_L = 0.5*d_imo*del_vx_L/a + 0.5*del_p_L/(a*a);
+  // cell i-2. The equality checks check the direction and subtracts one from the direction
+  // im2 stands for "i minus 2"
+  reconstruction::Primitive const cell_im2 =
+      reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1),
+                                zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
 
-    del_a_0_R = -0.5*d_imo*del_vx_R/a + 0.5*del_p_R/(a*a);
-    del_a_1_R = del_d_R - del_p_R/(a*a);
-    del_a_2_R = del_vy_R;
-    del_a_3_R = del_vz_R;
-    del_a_4_R = 0.5*d_imo*del_vx_R/a + 0.5*del_p_R/(a*a);
+  // cell i+2. The equality checks check the direction and adds one to the direction
+  // ip2 stands for "i plus 2"
+  reconstruction::Primitive const cell_ip2 =
+      reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1),
+                                zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
 
-    del_a_0_C = -0.5*d_imo*del_vx_C/a + 0.5*del_p_C/(a*a);
-    del_a_1_C = del_d_C - del_p_C/(a*a);
-    del_a_2_C = del_vy_C;
-    del_a_3_C = del_vz_C;
-    del_a_4_C = 0.5*d_imo*del_vx_C/a + 0.5*del_p_C/(a*a);
+  // Steps 2 - 5 are repeated for cell i-1, i, and i+1
 
-    del_a_0_G = -0.5*d_imo*del_vx_G/a + 0.5*del_p_G/(a*a);
-    del_a_1_G = del_d_G - del_p_G/(a*a);
-    del_a_2_G = del_vy_G;
-    del_a_3_G = del_vz_G;
-    del_a_4_G = 0.5*d_imo*del_vx_G/a + 0.5*del_p_G/(a*a);
+  // ===============
+  // Cell i-1 slopes
+  // ===============
 
+  // calculate the adiabatic sound speed in cell im1
+  Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma);
+  // this isn't actually used and the compiler should optimize it away but since this is the only reconstruction
+  // function that won't use it it was easier to add it here as an unused variable
+  reconstruction::EigenVecs eigenvector;
 
-    // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables
-    //          Stone Eqn 38
+  // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L
+  // and R refer to locations relative to the cell center Stone Eqn 36
 
-    del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0;
+  // left
+  reconstruction::Primitive del_L = reconstruction::Compute_Slope(cell_im2, cell_im1);
 
-    if (del_a_0_L*del_a_0_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R));
-      lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G));
-      del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_1_L*del_a_1_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R));
-      lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G));
-      del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_2_L*del_a_2_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R));
-      lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G));
-      del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_3_L*del_a_3_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R));
-      lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G));
-      del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_4_L*del_a_4_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R));
-      lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G));
-      del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    #ifdef DE
-    if (del_ge_L*del_ge_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R));
-      lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G));
-      del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    else del_ge_m_imo = 0.0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) {
-        lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i]));
-        lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i]));
-        del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-      }
-      else del_scalar_m_imo[i] = 0.0;
-    }
-    #endif
-
-
-    // Step 5 - Project the monotonized difference in the characteristic variables back onto the
-    //          primitive variables
-    //          Stone Eqn 39
-
-    del_d_m_imo  = del_a_0_m + del_a_1_m + del_a_4_m;
-    del_vx_m_imo = -a*del_a_0_m/d_imo + a*del_a_4_m/d_imo;
-    del_vy_m_imo = del_a_2_m;
-    del_vz_m_imo = del_a_3_m;
-    del_p_m_imo  = a*a*del_a_0_m + a*a*del_a_4_m;
-
-
-    // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables
-    //          Note that here L and R refer to locations relative to the cell center
-    //          Stone Eqn 36
-
-    // calculate the adiabatic sound speed in cell i
-    a = sqrt(gamma*p_i/d_i);
-
-    // left
-    del_d_L  = d_i  - d_imo;
-    del_vx_L = vx_i - vx_imo;
-    del_vy_L = vy_i - vy_imo;
-    del_vz_L = vz_i - vz_imo;
-    del_p_L  = p_i  - p_imo;
-
-    // right
-    del_d_R  = d_ipo  - d_i;
-    del_vx_R = vx_ipo - vx_i;
-    del_vy_R = vy_ipo - vy_i;
-    del_vz_R = vz_ipo - vz_i;
-    del_p_R  = p_ipo  - p_i;
-
-    // centered
-    del_d_C  = 0.5*(d_ipo - d_imo);
-    del_vx_C = 0.5*(vx_ipo - vx_imo);
-    del_vy_C = 0.5*(vy_ipo - vy_imo);
-    del_vz_C = 0.5*(vz_ipo - vz_imo);
-    del_p_C  = 0.5*(p_ipo - p_imo);
-
-    // van Leer
-    if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); }
-    else { del_d_G = 0.0; }
-    if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); }
-    else { del_vx_G = 0.0; }
-    if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); }
-    else { del_vy_G = 0.0; }
-    if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); }
-    else { del_vz_G = 0.0; }
-    if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); }
-    else { del_p_G = 0.0; }
-
-    #ifdef DE
-    del_ge_L = ge_i - ge_imo;
-    del_ge_R = ge_ipo - ge_i;
-    del_ge_C = 0.5*(ge_ipo - ge_imo);
-    if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); }
-    else { del_ge_G = 0.0; }
-    #endif
-
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      del_scalar_L[i] = scalar_i[i] - scalar_imo[i];
-      del_scalar_R[i] = scalar_ipo[i] - scalar_i[i];
-      del_scalar_C[i] = 0.5*(scalar_ipo[i] - scalar_imo[i]);
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); }
-      else { del_scalar_G[i] = 0.0; }
-    }
-    #endif
-
-    // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables
-    //          Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation)
-    //          Use the eigenvectors given in Stone 2008, Appendix A
-
-    del_a_0_L = -0.5*d_i*del_vx_L/a + 0.5*del_p_L/(a*a);
-    del_a_1_L = del_d_L - del_p_L/(a*a);
-    del_a_2_L = del_vy_L;
-    del_a_3_L = del_vz_L;
-    del_a_4_L = 0.5*d_i*del_vx_L/a + 0.5*del_p_L/(a*a);
-
-    del_a_0_R = -0.5*d_i*del_vx_R/a + 0.5*del_p_R/(a*a);
-    del_a_1_R = del_d_R - del_p_R/(a*a);
-    del_a_2_R = del_vy_R;
-    del_a_3_R = del_vz_R;
-    del_a_4_R = 0.5*d_i*del_vx_R/a + 0.5*del_p_R/(a*a);
-
-    del_a_0_C = -0.5*d_i*del_vx_C/a + 0.5*del_p_C/(a*a);
-    del_a_1_C = del_d_C - del_p_C/(a*a);
-    del_a_2_C = del_vy_C;
-    del_a_3_C = del_vz_C;
-    del_a_4_C = 0.5*d_i*del_vx_C/a + 0.5*del_p_C/(a*a);
-
-    del_a_0_G = -0.5*d_i*del_vx_G/a + 0.5*del_p_G/(a*a);
-    del_a_1_G = del_d_G - del_p_G/(a*a);
-    del_a_2_G = del_vy_G;
-    del_a_3_G = del_vz_G;
-    del_a_4_G = 0.5*d_i*del_vx_G/a + 0.5*del_p_G/(a*a);
-
-
-    // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables
-    //          Stone Eqn 38
-
-    del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0;
-
-    if (del_a_0_L*del_a_0_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R));
-      lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G));
-      del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_1_L*del_a_1_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R));
-      lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G));
-      del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_2_L*del_a_2_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R));
-      lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G));
-      del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_3_L*del_a_3_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R));
-      lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G));
-      del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_4_L*del_a_4_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R));
-      lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G));
-      del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    #ifdef DE
-    if (del_ge_L*del_ge_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R));
-      lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G));
-      del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    else del_ge_m_i = 0.0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) {
-        lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i]));
-        lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i]));
-        del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-      }
-      else del_scalar_m_i[i] = 0.0;
-    }
-    #endif
-
-
-    // Step 5 - Project the monotonized difference in the characteristic variables back onto the
-    //          primitive variables
-    //          Stone Eqn 39
-
-    del_d_m_i  = del_a_0_m + del_a_1_m + del_a_4_m;
-    del_vx_m_i = -a*del_a_0_m/d_i + a*del_a_4_m/d_i;
-    del_vy_m_i = del_a_2_m;
-    del_vz_m_i = del_a_3_m;
-    del_p_m_i  = a*a*del_a_0_m + a*a*del_a_4_m;
-
-
-    // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables
-    //          Note that here L and R refer to locations relative to the cell center
-    //          Stone Eqn 36
-
-
-    // calculate the adiabatic sound speed in cell ipo
-    a = sqrt(gamma*p_ipo/d_ipo);
-
-    // left
-    del_d_L  = d_ipo - d_i;
-    del_vx_L = vx_ipo - vx_i;
-    del_vy_L = vy_ipo - vy_i;
-    del_vz_L = vz_ipo - vz_i;
-    del_p_L  = p_ipo  - p_i;
-
-    // right
-    del_d_R  = d_ipt  - d_ipo;
-    del_vx_R = vx_ipt - vx_ipo;
-    del_vy_R = vy_ipt - vy_ipo;
-    del_vz_R = vz_ipt - vz_ipo;
-    del_p_R  = p_ipt  - p_ipo;
-
-    // centered
-    del_d_C  = 0.5*(d_ipt - d_i);
-    del_vx_C = 0.5*(vx_ipt- vx_i);
-    del_vy_C = 0.5*(vy_ipt - vy_i);
-    del_vz_C = 0.5*(vz_ipt - vz_i);
-    del_p_C  = 0.5*(p_ipt - p_i);
-
-    // van Leer
-    if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); }
-    else { del_d_G = 0.0; }
-    if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); }
-    else { del_vx_G = 0.0; }
-    if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); }
-    else { del_vy_G = 0.0; }
-    if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); }
-    else { del_vz_G = 0.0; }
-    if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); }
-    else { del_p_G = 0.0; }
-
-    #ifdef DE
-    del_ge_L = ge_ipo - ge_i;
-    del_ge_R = ge_ipt - ge_ipo;
-    del_ge_C = 0.5*(ge_ipt- ge_i);
-    if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); }
-    else { del_ge_G = 0.0; }
-    #endif
-
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      del_scalar_L[i] = scalar_ipo[i] - scalar_i[i];
-      del_scalar_R[i] = scalar_ipt[i] - scalar_ipo[i];
-      del_scalar_C[i] = 0.5*(scalar_ipt[i]- scalar_i[i]);
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); }
-      else { del_scalar_G[i] = 0.0; }
-    }
-    #endif
+  // right
+  reconstruction::Primitive del_R = reconstruction::Compute_Slope(cell_im1, cell_i);
 
+  // centered
+  reconstruction::Primitive del_C = reconstruction::Compute_Slope(cell_im2, cell_i, 0.5);
 
-    // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables
-    //          Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation)
-    //          Use the eigenvectors given in Stone 2008, Appendix A
+  // Van Leer
+  reconstruction::Primitive del_G = reconstruction::Van_Leer_Slope(del_L, del_R);
 
-    del_a_0_L = -0.5*d_ipo*del_vx_L/a + 0.5*del_p_L/(a*a);
-    del_a_1_L = del_d_L - del_p_L/(a*a);
-    del_a_2_L = del_vy_L;
-    del_a_3_L = del_vz_L;
-    del_a_4_L = 0.5*d_ipo*del_vx_L/a + 0.5*del_p_L/(a*a);
+  // Step 3 - Project the left, right, centered and van Leer differences onto the
+  // characteristic variables Stone Eqn 37 (del_a are differences in
+  // characteristic variables, see Stone for notation) Use the eigenvectors
+  // given in Stone 2008, Appendix A
+  reconstruction::Characteristic del_a_L = reconstruction::Primitive_To_Characteristic(
+      cell_im1, del_L, eigenvector, sound_speed, sound_speed * sound_speed, gamma);
 
-    del_a_0_R = -0.5*d_ipo*del_vx_R/a + 0.5*del_p_R/(a*a);
-    del_a_1_R = del_d_R - del_p_R/(a*a);
-    del_a_2_R = del_vy_R;
-    del_a_3_R = del_vz_R;
-    del_a_4_R = 0.5*d_ipo*del_vx_R/a + 0.5*del_p_R/(a*a);
+  reconstruction::Characteristic del_a_R = reconstruction::Primitive_To_Characteristic(
+      cell_im1, del_R, eigenvector, sound_speed, sound_speed * sound_speed, gamma);
 
-    del_a_0_C = -0.5*d_ipo*del_vx_C/a + 0.5*del_p_C/(a*a);
-    del_a_1_C = del_d_C - del_p_C/(a*a);
-    del_a_2_C = del_vy_C;
-    del_a_3_C = del_vz_C;
-    del_a_4_C = 0.5*d_ipo*del_vx_C/a + 0.5*del_p_C/(a*a);
+  reconstruction::Characteristic del_a_C = reconstruction::Primitive_To_Characteristic(
+      cell_im1, del_C, eigenvector, sound_speed, sound_speed * sound_speed, gamma);
 
-    del_a_0_G = -0.5*d_ipo*del_vx_G/a + 0.5*del_p_G/(a*a);
-    del_a_1_G = del_d_G - del_p_G/(a*a);
-    del_a_2_G = del_vy_G;
-    del_a_3_G = del_vz_G;
-    del_a_4_G = 0.5*d_ipo*del_vx_G/a + 0.5*del_p_G/(a*a);
+  reconstruction::Characteristic del_a_G = reconstruction::Primitive_To_Characteristic(
+      cell_im1, del_G, eigenvector, sound_speed, sound_speed * sound_speed, gamma);
 
+  // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables
+  // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables
+  // Stone Eqn 39
+  reconstruction::Primitive const del_m_im1 = reconstruction::Monotonize_Characteristic_Return_Primitive(
+      cell_im1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed,
+      sound_speed * sound_speed, gamma);
 
-    // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables
-    //          Stone Eqn 38
+  // =============
+  // Cell i slopes
+  // =============
 
-    del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0;
+  // calculate the adiabatic sound speed in cell i
+  sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma);
 
-    if (del_a_0_L*del_a_0_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R));
-      lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G));
-      del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_1_L*del_a_1_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R));
-      lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G));
-      del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_2_L*del_a_2_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R));
-      lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G));
-      del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_3_L*del_a_3_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R));
-      lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G));
-      del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    if (del_a_4_L*del_a_4_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R));
-      lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G));
-      del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    #ifdef DE
-    if (del_ge_L*del_ge_R > 0.0) {
-      lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R));
-      lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G));
-      del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-    }
-    else del_ge_m_ipo = 0.0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      if (del_scalar_L[i]*del_scalar_R[i] > 0.0) {
-        lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i]));
-        lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i]));
-        del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
-      }
-      else del_scalar_m_ipo[i] = 0.0;
-    }
-    #endif
-
-
-    // Step 5 - Project the monotonized difference in the characteristic variables back onto the
-    //          primitive variables
-    //          Stone Eqn 39
-
-    del_d_m_ipo  = del_a_0_m + del_a_1_m + del_a_4_m;
-    del_vx_m_ipo = -a*del_a_0_m / d_ipo + a* del_a_4_m / d_ipo;
-    del_vy_m_ipo = del_a_2_m;
-    del_vz_m_ipo = del_a_3_m;
-    del_p_m_ipo  = a*a*del_a_0_m + a*a*del_a_4_m;
-
-
-    // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center
-    //          Here, the subscripts L and R refer to the left and right side of the ith cell center
-    //          Stone Eqn 46
-
-    d_L  = 0.5*(d_i + d_imo)   - (del_d_m_i  - del_d_m_imo)  / 6.0;
-    vx_L = 0.5*(vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0;
-    vy_L = 0.5*(vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0;
-    vz_L = 0.5*(vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0;
-    p_L  = 0.5*(p_i + p_imo)   - (del_p_m_i  - del_p_m_imo)  / 6.0;
-
-    d_R  = 0.5*(d_ipo + d_i)   - (del_d_m_ipo  - del_d_m_i)  / 6.0;
-    vx_R = 0.5*(vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0;
-    vy_R = 0.5*(vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0;
-    vz_R = 0.5*(vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0;
-    p_R  = 0.5*(p_ipo + p_i)   - (del_p_m_ipo  - del_p_m_i)  / 6.0;
-
-    #ifdef DE
-    ge_L  = 0.5*(ge_i + ge_imo)   - (del_ge_m_i  - del_ge_m_imo)  / 6.0;
-    ge_R  = 0.5*(ge_ipo + ge_i)   - (del_ge_m_ipo  - del_ge_m_i)  / 6.0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_L[i]  = 0.5*(scalar_i[i] + scalar_imo[i])   - (del_scalar_m_i[i]  - del_scalar_m_imo[i])  / 6.0;
-      scalar_R[i]  = 0.5*(scalar_ipo[i] + scalar_i[i])   - (del_scalar_m_ipo[i]  - del_scalar_m_i[i])  / 6.0;
-    }
-    #endif
-
-
-    // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side
-    //          of cell center lie between neighboring cell-centered values
-    //          Stone Eqns 47 - 53
-
-    if ((d_R  - d_i) *(d_i  - d_L)  <= 0) d_L  = d_R  = d_i;
-    if ((vx_R - vx_i)*(vx_i - vx_L) <= 0) vx_L = vx_R = vx_i;
-    if ((vy_R - vy_i)*(vy_i - vy_L) <= 0) vy_L = vy_R = vy_i;
-    if ((vz_R - vz_i)*(vz_i - vz_L) <= 0) vz_L = vz_R = vz_i;
-    if ((p_R  - p_i) *(p_i  - p_L)  <= 0) p_L  = p_R  = p_i;
-
-    if ( 6.0*(d_R  - d_L) *(d_i  - 0.5*(d_L  + d_R))  > (d_R  - d_L) *(d_R  - d_L))  d_L  = 3.0*d_i  - 2.0*d_R;
-    if ( 6.0*(vx_R - vx_L)*(vx_i - 0.5*(vx_L + vx_R)) > (vx_R - vx_L)*(vx_R - vx_L)) vx_L = 3.0*vx_i - 2.0*vx_R;
-    if ( 6.0*(vy_R - vy_L)*(vy_i - 0.5*(vy_L + vy_R)) > (vy_R - vy_L)*(vy_R - vy_L)) vy_L = 3.0*vy_i - 2.0*vy_R;
-    if ( 6.0*(vz_R - vz_L)*(vz_i - 0.5*(vz_L + vz_R)) > (vz_R - vz_L)*(vz_R - vz_L)) vz_L = 3.0*vz_i - 2.0*vz_R;
-    if ( 6.0*(p_R  - p_L) *(p_i  - 0.5*(p_L  + p_R))  > (p_R  - p_L) *(p_R  - p_L))  p_L  = 3.0*p_i  - 2.0*p_R;
-
-    if ( 6.0*(d_R  - d_L) *(d_i  - 0.5*(d_L  + d_R))  < -(d_R  - d_L) *(d_R  - d_L))  d_R  = 3.0*d_i  - 2.0*d_L;
-    if ( 6.0*(vx_R - vx_L)*(vx_i - 0.5*(vx_L + vx_R)) < -(vx_R - vx_L)*(vx_R - vx_L)) vx_R = 3.0*vx_i - 2.0*vx_L;
-    if ( 6.0*(vy_R - vy_L)*(vy_i - 0.5*(vy_L + vy_R)) < -(vy_R - vy_L)*(vy_R - vy_L)) vy_R = 3.0*vy_i - 2.0*vy_L;
-    if ( 6.0*(vz_R - vz_L)*(vz_i - 0.5*(vz_L + vz_R)) < -(vz_R - vz_L)*(vz_R - vz_L)) vz_R = 3.0*vz_i - 2.0*vz_L;
-    if ( 6.0*(p_R  - p_L) *(p_i  - 0.5*(p_L  + p_R))  < -(p_R  - p_L) *(p_R  - p_L))  p_R  = 3.0*p_i  - 2.0*p_L;
-
-    d_L  = fmax( fmin(d_i,  d_imo), d_L );
-    d_L  = fmin( fmax(d_i,  d_imo), d_L );
-    d_R  = fmax( fmin(d_i,  d_ipo), d_R );
-    d_R  = fmin( fmax(d_i,  d_ipo), d_R );
-    vx_L = fmax( fmin(vx_i, vx_imo), vx_L );
-    vx_L = fmin( fmax(vx_i, vx_imo), vx_L );
-    vx_R = fmax( fmin(vx_i, vx_ipo), vx_R );
-    vx_R = fmin( fmax(vx_i, vx_ipo), vx_R );
-    vy_L = fmax( fmin(vy_i, vy_imo), vy_L );
-    vy_L = fmin( fmax(vy_i, vy_imo), vy_L );
-    vy_R = fmax( fmin(vy_i, vy_ipo), vy_R );
-    vy_R = fmin( fmax(vy_i, vy_ipo), vy_R );
-    vz_L = fmax( fmin(vz_i, vz_imo), vz_L );
-    vz_L = fmin( fmax(vz_i, vz_imo), vz_L );
-    vz_R = fmax( fmin(vz_i, vz_ipo), vz_R );
-    vz_R = fmin( fmax(vz_i, vz_ipo), vz_R );
-    p_L  = fmax( fmin(p_i,  p_imo), p_L );
-    p_L  = fmin( fmax(p_i,  p_imo), p_L );
-    p_R  = fmax( fmin(p_i,  p_ipo), p_R );
-    p_R  = fmin( fmax(p_i,  p_ipo), p_R );
-
-    #ifdef DE
-    if ((ge_R  - ge_i) *(ge_i  - ge_L)  <= 0) ge_L  = ge_R  = ge_i;
-    if ( 6.0*(ge_R  - ge_L) *(ge_i  - 0.5*(ge_L  + ge_R))  > (ge_R  - ge_L) *(ge_R  - ge_L))  ge_L  = 3.0*ge_i  - 2.0*ge_R;
-    if ( 6.0*(ge_R  - ge_L) *(ge_i  - 0.5*(ge_L  + ge_R))  < -(ge_R  - ge_L) *(ge_R  - ge_L))  ge_R  = 3.0*ge_i  - 2.0*ge_L;
-    ge_L  = fmax( fmin(ge_i,  ge_imo), ge_L );
-    ge_L  = fmin( fmax(ge_i,  ge_imo), ge_L );
-    ge_R  = fmax( fmin(ge_i,  ge_ipo), ge_R );
-    ge_R  = fmin( fmax(ge_i,  ge_ipo), ge_R );
-    #endif
-
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      if ((scalar_R[i]  - scalar_i[i]) *(scalar_i[i]  - scalar_L[i])  <= 0) scalar_L[i]  = scalar_R[i]  = scalar_i[i];
-      if ( 6.0*(scalar_R[i]  - scalar_L[i]) *(scalar_i[i]  - 0.5*(scalar_L[i]  + scalar_R[i]))  > (scalar_R[i]  - scalar_L[i]) *(scalar_R[i]  - scalar_L[i]))  scalar_L[i]  = 3.0*scalar_i[i]  - 2.0*scalar_R[i];
-      if ( 6.0*(scalar_R[i]  - scalar_L[i]) *(scalar_i[i]  - 0.5*(scalar_L[i]  + scalar_R[i]))  < -(scalar_R[i]  - scalar_L[i]) *(scalar_R[i]  - scalar_L[i]))  scalar_R[i]  = 3.0*scalar_i[i]  - 2.0*scalar_L[i];
-      scalar_L[i]  = fmax( fmin(scalar_i[i],  scalar_imo[i]), scalar_L[i] );
-      scalar_L[i]  = fmin( fmax(scalar_i[i],  scalar_imo[i]), scalar_L[i] );
-      scalar_R[i]  = fmax( fmin(scalar_i[i],  scalar_ipo[i]), scalar_R[i] );
-      scalar_R[i]  = fmin( fmax(scalar_i[i],  scalar_ipo[i]), scalar_R[i] );
-    }
-    #endif
-
-    // #ifdef CTU
-    #ifndef VL
-
-    // Step 8 - Compute the coefficients for the monotonized parabolic interpolation function
-    //          Stone Eqn 54
-
-    del_d_m_i  = d_R  - d_L;
-    del_vx_m_i = vx_R - vx_L;
-    del_vy_m_i = vy_R - vy_L;
-    del_vz_m_i = vz_R - vz_L;
-    del_p_m_i  = p_R  - p_L;
-
-    d_6  = 6.0*(d_i  - 0.5*(d_L  + d_R));
-    vx_6 = 6.0*(vx_i - 0.5*(vx_L + vx_R));
-    vy_6 = 6.0*(vy_i - 0.5*(vy_L + vy_R));
-    vz_6 = 6.0*(vz_i - 0.5*(vz_L + vz_R));
-    p_6  = 6.0*(p_i  - 0.5*(p_L  + p_R));
-
-    #ifdef DE
-    del_ge_m_i = ge_R - ge_L;
-    ge_6 = 6.0*(ge_i - 0.5*(ge_L + ge_R));
-    #endif
-
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      del_scalar_m_i[i] = scalar_R[i] - scalar_L[i];
-      scalar_6[i] = 6.0*(scalar_i[i] - 0.5*(scalar_L[i] + scalar_R[i]));
-    }
-    #endif
+  // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L
+  // and R refer to locations relative to the cell center Stone Eqn 36
 
+  // left
+  del_L = reconstruction::Compute_Slope(cell_im1, cell_i);
 
-    // Compute the eigenvalues of the linearized equations in the
-    // primitive variables using the cell-centered primitive variables
+  // right
+  del_R = reconstruction::Compute_Slope(cell_i, cell_ip1);
 
-    // recalculate the adiabatic sound speed in cell i
-    a = sqrt(gamma*p_i/d_i);
+  // centered
+  del_C = reconstruction::Compute_Slope(cell_im1, cell_ip1, 0.5);
 
-    lambda_m = vx_i-a;
-    lambda_0 = vx_i;
-    lambda_p = vx_i+a;
+  // Van Leer
+  del_G = reconstruction::Van_Leer_Slope(del_L, del_R);
+
+  // Step 3 - Project the left, right, centered and van Leer differences onto the
+  // characteristic variables Stone Eqn 37 (del_a are differences in
+  // characteristic variables, see Stone for notation) Use the eigenvectors
+  // given in Stone 2008, Appendix A
+  del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
+
+  del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
+
+  del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
+
+  del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
+
+  // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables
+  // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables
+  // Stone Eqn 39
+  reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive(
+      cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed,
+      sound_speed * sound_speed, gamma);
+
+  // ===============
+  // Cell i+1 slopes
+  // ===============
 
+  // calculate the adiabatic sound speed in cell ipo
+  sound_speed = hydro_utilities::Calc_Sound_Speed(cell_ip1.pressure, cell_ip1.density, gamma);
 
-    // Step 9 - Compute the left and right interface values using monotonized parabolic interpolation
-    //          Stone Eqns 55 & 56
+  // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L
+  // and R refer to locations relative to the cell center Stone Eqn 36
 
-    // largest eigenvalue
-    lambda_max = fmax(lambda_p, (Real) 0);
-    // smallest eigenvalue
-    lambda_min = fmin(lambda_m, (Real) 0);
+  // left
+  del_L = reconstruction::Compute_Slope(cell_i, cell_ip1);
+
+  // right
+  del_R = reconstruction::Compute_Slope(cell_ip1, cell_ip2);
 
+  // centered
+  del_C = reconstruction::Compute_Slope(cell_i, cell_ip2, 0.5);
 
-    // left interface value, i+1/2
-    d_R  = d_R  - lambda_max * (0.5*dtodx)*(del_d_m_i  - (1.0 - (2.0/3.0)*lambda_max*dtodx)*d_6);
-    vx_R = vx_R - lambda_max * (0.5*dtodx)*(del_vx_m_i - (1.0 - (2.0/3.0)*lambda_max*dtodx)*vx_6);
-    vy_R = vy_R - lambda_max * (0.5*dtodx)*(del_vy_m_i - (1.0 - (2.0/3.0)*lambda_max*dtodx)*vy_6);
-    vz_R = vz_R - lambda_max * (0.5*dtodx)*(del_vz_m_i - (1.0 - (2.0/3.0)*lambda_max*dtodx)*vz_6);
-    p_R  = p_R  - lambda_max * (0.5*dtodx)*(del_p_m_i  - (1.0 - (2.0/3.0)*lambda_max*dtodx)*p_6);
+  // Van Leer
+  del_G = reconstruction::Van_Leer_Slope(del_L, del_R);
 
-    // right interface value, i-1/2
-    d_L  = d_L  - lambda_min * (0.5*dtodx)*(del_d_m_i  + (1.0 + (2.0/3.0)*lambda_min*dtodx)*d_6);
-    vx_L = vx_L - lambda_min * (0.5*dtodx)*(del_vx_m_i + (1.0 + (2.0/3.0)*lambda_min*dtodx)*vx_6);
-    vy_L = vy_L - lambda_min * (0.5*dtodx)*(del_vy_m_i + (1.0 + (2.0/3.0)*lambda_min*dtodx)*vy_6);
-    vz_L = vz_L - lambda_min * (0.5*dtodx)*(del_vz_m_i + (1.0 + (2.0/3.0)*lambda_min*dtodx)*vz_6);
-    p_L  = p_L  - lambda_min * (0.5*dtodx)*(del_p_m_i  + (1.0 + (2.0/3.0)*lambda_min*dtodx)*p_6);
+  // Step 3 - Project the left, right, centered and van Leer differences onto the
+  // characteristic variables Stone Eqn 37 (del_a are differences in
+  // characteristic variables, see Stone for notation) Use the eigenvectors
+  // given in Stone 2008, Appendix A
+  del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
+
+  del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
+
+  del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
+
+  del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, eigenvector, sound_speed,
+                                                        sound_speed * sound_speed, gamma);
 
-    #ifdef DE
-    ge_R = ge_R - lambda_max * (0.5*dtodx)*(del_ge_m_i - (1.0 - (2.0/3.0)*lambda_max*dtodx)*ge_6);
-    ge_L = ge_L - lambda_min * (0.5*dtodx)*(del_ge_m_i + (1.0 + (2.0/3.0)*lambda_min*dtodx)*ge_6);
-    #endif
+  // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables
+  // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables
+  // Stone Eqn 39
+  reconstruction::Primitive const del_m_ip1 = reconstruction::Monotonize_Characteristic_Return_Primitive(
+      cell_ip1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed,
+      sound_speed * sound_speed, gamma);
+
+  // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center Here, the
+  // subscripts L and R refer to the left and right side of the ith cell center Stone Eqn 46
+  reconstruction::Primitive interface_L_iph =
+      reconstruction::Calc_Interface_Parabolic(cell_ip1, cell_i, del_m_ip1, del_m_i);
+
+  reconstruction::Primitive interface_R_imh =
+      reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, del_m_i, del_m_im1);
+
+  // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side of cell center lie
+  // between neighboring cell-centered values Stone Eqns 47 - 53
+  reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh);
+
+  // This is the beginning of the characteristic tracing
+  // Step 8 - Compute the coefficients for the monotonized parabolic
+  // interpolation function
+  //          Stone Eqn 54
 
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_R[i] = scalar_R[i] - lambda_max * (0.5*dtodx)*(del_scalar_m_i[i] - (1.0 - (2.0/3.0)*lambda_max*dtodx)*scalar_6[i]);
-      scalar_L[i] = scalar_L[i] - lambda_min * (0.5*dtodx)*(del_scalar_m_i[i] + (1.0 + (2.0/3.0)*lambda_min*dtodx)*scalar_6[i]);
-    }
-    #endif
-
-    // Step 10 - Perform the characteristic tracing
-    //           Stone Eqns 57 - 60
-
-    // left-hand interface value, i+1/2
-    sum_1 = 0;
-    sum_2 = 0;
-    sum_3 = 0;
-    sum_4 = 0;
-    sum_5 = 0;
-    #ifdef DE
-    sum_ge = 0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      sum_scalar[i] = 0;
-    }
-    #endif
-
-    if (lambda_m >= 0)
-    {
-      A = (0.5*dtodx) * (lambda_p - lambda_m);
-      B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_m*lambda_m);
-
-      chi_1 = A*(del_d_m_i - d_6) + B*d_6;
-      chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6;
-      chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6;
-      chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6;
-      chi_5 = A*(del_p_m_i - p_6) + B*p_6;
-
-      sum_1 += -0.5*(d_i*chi_2/a - chi_5/(a*a));
-      sum_2 += 0.5*(chi_2 - chi_5/(a*d_i));
-      sum_5 += -0.5*(d_i*chi_2*a - chi_5);
-    }
-    if (lambda_0 >= 0)
-    {
-      A = (0.5*dtodx) * (lambda_p - lambda_0);
-      B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_0*lambda_0);
-
-      chi_1 = A*(del_d_m_i - d_6) + B*d_6;
-      chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6;
-      chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6;
-      chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6;
-      chi_5 = A*(del_p_m_i - p_6) + B*p_6;
-      #ifdef DE
-      chi_ge = A*(del_ge_m_i - ge_6) + B*ge_6;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        chi_scalar[i] = A*(del_scalar_m_i[i] - scalar_6[i]) + B*scalar_6[i];
-      }
-      #endif
-
-      sum_1 += chi_1 - chi_5/(a*a);
-      sum_3 += chi_3;
-      sum_4 += chi_4;
-      #ifdef DE
-      sum_ge += chi_ge;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        sum_scalar[i] += chi_scalar[i];
-      }
-      #endif
-    }
-    if (lambda_p >= 0)
-    {
-      A = (0.5*dtodx) * (lambda_p - lambda_p);
-      B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_p*lambda_p);
-
-      chi_1 = A*(del_d_m_i - d_6) + B*d_6;
-      chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6;
-      chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6;
-      chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6;
-      chi_5 = A*(del_p_m_i - p_6) + B*p_6;
-
-      sum_1 += 0.5*(d_i*chi_2/a + chi_5/(a*a));
-      sum_2 += 0.5*(chi_2 + chi_5/(a*d_i));
-      sum_5 += 0.5*(d_i*chi_2*a + chi_5);
-    }
+  del_m_i.density    = interface_L_iph.density - interface_R_imh.density;
+  del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x;
+  del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y;
+  del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z;
+  del_m_i.pressure   = interface_L_iph.pressure - interface_R_imh.pressure;
+
+  Real const d_6  = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density));
+  Real const vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x));
+  Real const vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y));
+  Real const vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z));
+  Real const p_6  = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure));
+
+#ifdef DE
+  del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy;
+  Real const ge_6    = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy));
+#endif  // DE
+
+#ifdef SCALAR
+  Real scalar_6[NSCALARS];
+  for (int i = 0; i < NSCALARS; i++) {
+    del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i];
+    scalar_6[i]       = 6.0 * (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i]));
+  }
+#endif  // SCALAR
+
+  // Compute the eigenvalues of the linearized equations in the
+  // primitive variables using the cell-centered primitive variables
+
+  // recalculate the adiabatic sound speed in cell i
+  sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma);
+
+  Real const lambda_m = cell_i.velocity_x - sound_speed;
+  Real const lambda_0 = cell_i.velocity_x;
+  Real const lambda_p = cell_i.velocity_x + sound_speed;
+
+  // Step 9 - Compute the left and right interface values using monotonized
+  // parabolic interpolation
+  //          Stone Eqns 55 & 56
+
+  // largest eigenvalue
+  Real const lambda_max = fmax(lambda_p, (Real)0);
+  // smallest eigenvalue
+  Real const lambda_min = fmin(lambda_m, (Real)0);
+
+  // left interface value, i+1/2
+  Real const dtodx = dt / dx;
+  interface_L_iph.density =
+      interface_L_iph.density -
+      lambda_max * (0.5 * dtodx) * (del_m_i.density - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6);
+  interface_L_iph.velocity_x =
+      interface_L_iph.velocity_x -
+      lambda_max * (0.5 * dtodx) * (del_m_i.velocity_x - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6);
+  interface_L_iph.velocity_y =
+      interface_L_iph.velocity_y -
+      lambda_max * (0.5 * dtodx) * (del_m_i.velocity_y - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6);
+  interface_L_iph.velocity_z =
+      interface_L_iph.velocity_z -
+      lambda_max * (0.5 * dtodx) * (del_m_i.velocity_z - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6);
+  interface_L_iph.pressure =
+      interface_L_iph.pressure -
+      lambda_max * (0.5 * dtodx) * (del_m_i.pressure - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6);
+
+  // right interface value, i-1/2
+  interface_R_imh.density =
+      interface_R_imh.density -
+      lambda_min * (0.5 * dtodx) * (del_m_i.density + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6);
+  interface_R_imh.velocity_x =
+      interface_R_imh.velocity_x -
+      lambda_min * (0.5 * dtodx) * (del_m_i.velocity_x + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6);
+  interface_R_imh.velocity_y =
+      interface_R_imh.velocity_y -
+      lambda_min * (0.5 * dtodx) * (del_m_i.velocity_y + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6);
+  interface_R_imh.velocity_z =
+      interface_R_imh.velocity_z -
+      lambda_min * (0.5 * dtodx) * (del_m_i.velocity_z + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6);
+  interface_R_imh.pressure =
+      interface_R_imh.pressure -
+      lambda_min * (0.5 * dtodx) * (del_m_i.pressure + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6);
+
+#ifdef DE
+  interface_L_iph.gas_energy =
+      interface_L_iph.gas_energy -
+      lambda_max * (0.5 * dtodx) * (del_m_i.gas_energy - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6);
+  interface_R_imh.gas_energy =
+      interface_R_imh.gas_energy -
+      lambda_min * (0.5 * dtodx) * (del_m_i.gas_energy + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6);
+#endif  // DE
+
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    interface_L_iph.scalar[i] =
+        interface_L_iph.scalar[i] -
+        lambda_max * (0.5 * dtodx) * (del_m_i.scalar[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]);
+    interface_R_imh.scalar[i] =
+        interface_R_imh.scalar[i] -
+        lambda_min * (0.5 * dtodx) * (del_m_i.scalar[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]);
+  }
+#endif  // SCALAR
+
+  // Step 10 - Perform the characteristic tracing
+  //           Stone Eqns 57 - 60
+
+  // left-hand interface value, i+1/2
+  Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0;
+#ifdef DE
+  Real sum_ge = 0;
+  Real chi_ge = 0;
+#endif  // DE
+#ifdef SCALAR
+  Real chi_scalar[NSCALARS];
+  Real sum_scalar[NSCALARS];
+  for (Real &val : sum_scalar) {
+    val = 0;
+  }
+#endif  // SCALAR
 
-    // add the corrections to the initial guesses for the interface values
-    d_R += sum_1;
-    vx_R += sum_2;
-    vy_R += sum_3;
-    vz_R += sum_4;
-    p_R += sum_5;
-    #ifdef DE
-    ge_R += sum_ge;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_R[i] += sum_scalar[i];
-    }
-    #endif
-
-    // right-hand interface value, i-1/2
-    sum_1 = 0;
-    sum_2 = 0;
-    sum_3 = 0;
-    sum_4 = 0;
-    sum_5 = 0;
-    #ifdef DE
-    sum_ge = 0;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      sum_scalar[i] = 0;
-    }
-    #endif
-    if (lambda_m <= 0)
-    {
-      C = (0.5*dtodx) * (lambda_m - lambda_m);
-      D = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_m*lambda_m - lambda_m*lambda_m);
-
-      chi_1 = C*(del_d_m_i + d_6) + D*d_6;
-      chi_2 = C*(del_vx_m_i + vx_6) + D*vx_6;
-      chi_3 = C*(del_vy_m_i + vy_6) + D*vy_6;
-      chi_4 = C*(del_vz_m_i + vz_6) + D*vz_6;
-      chi_5 = C*(del_p_m_i + p_6) + D*p_6;
-
-      sum_1 += -0.5*(d_i*chi_2/a - chi_5/(a*a));
-      sum_2 += 0.5*(chi_2 - chi_5/(a*d_i));
-      sum_5 += -0.5*(d_i*chi_2*a - chi_5);
-    }
-    if (lambda_0 <= 0)
-    {
-      C = (0.5*dtodx) * (lambda_m - lambda_0);
-      D = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_m*lambda_m - lambda_0*lambda_0);
-
-      chi_1 = C*(del_d_m_i + d_6) + D*d_6;
-      chi_2 = C*(del_vx_m_i + vx_6) + D*vx_6;
-      chi_3 = C*(del_vy_m_i + vy_6) + D*vy_6;
-      chi_4 = C*(del_vz_m_i + vz_6) + D*vz_6;
-      chi_5 = C*(del_p_m_i + p_6) + D*p_6;
-      #ifdef DE
-      chi_ge = C*(del_ge_m_i + ge_6) + D*ge_6;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        chi_scalar[i] = C*(del_scalar_m_i[i] + scalar_6[i]) + D*scalar_6[i];
-      }
-      #endif
-
-      sum_1 += chi_1 - chi_5/(a*a);
-      sum_3 += chi_3;
-      sum_4 += chi_4;
-      #ifdef DE
-      sum_ge += chi_ge;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        sum_scalar[i] += chi_scalar[i];
-      }
-      #endif
-    }
-    if (lambda_p <= 0)
-    {
-      C = (0.5*dtodx) * (lambda_m - lambda_p);
-      D = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_m*lambda_m - lambda_p*lambda_p);
-
-      chi_1 = C*(del_d_m_i + d_6) + D*d_6;
-      chi_2 = C*(del_vx_m_i + vx_6) + D*vx_6;
-      chi_3 = C*(del_vy_m_i + vy_6) + D*vy_6;
-      chi_4 = C*(del_vz_m_i + vz_6) + D*vz_6;
-      chi_5 = C*(del_p_m_i + p_6) + D*p_6;
-
-      sum_1 += 0.5*(d_i*chi_2/a + chi_5/(a*a));
-      sum_2 += 0.5*(chi_2 + chi_5/(a*d_i));
-      sum_5 += 0.5*(d_i*chi_2*a + chi_5);
-    }
+  if (lambda_m >= 0) {
+    Real const A = (0.5 * dtodx) * (lambda_p - lambda_m);
+    Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m);
 
-    // add the corrections
-    d_L += sum_1;
-    vx_L += sum_2;
-    vy_L += sum_3;
-    vz_L += sum_4;
-    p_L += sum_5;
-    #ifdef DE
-    ge_L += sum_ge;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_L[i] += sum_scalar[i];
-    }
-    #endif
-
-    #endif //CTU
-
-    // enforce minimum values
-    d_L = fmax(d_L, (Real) TINY_NUMBER);
-    d_R = fmax(d_R, (Real) TINY_NUMBER);
-    p_L = fmax(p_L, (Real) TINY_NUMBER);
-    p_R = fmax(p_R, (Real) TINY_NUMBER);
-
-    // Step 11 - Send final values back from kernel
-
-    // bounds_R refers to the right side of the i-1/2 interface
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    dev_bounds_R[            id] = d_L;
-    dev_bounds_R[o1*n_cells + id] = d_L*vx_L;
-    dev_bounds_R[o2*n_cells + id] = d_L*vy_L;
-    dev_bounds_R[o3*n_cells + id] = d_L*vz_L;
-    dev_bounds_R[4*n_cells + id] = p_L/(gamma-1.0) + 0.5*d_L*(vx_L*vx_L + vy_L*vy_L + vz_L*vz_L);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_R[(5+i)*n_cells + id] = d_L*scalar_L[i];
-    }
-    #endif
-    #ifdef DE
-    dev_bounds_R[(n_fields-1)*n_cells + id] = d_L*ge_L;
-    #endif
-    // bounds_L refers to the left side of the i+1/2 interface
-    id = xid + yid*nx + zid*nx*ny;
-    dev_bounds_L[            id] = d_R;
-    dev_bounds_L[o1*n_cells + id] = d_R*vx_R;
-    dev_bounds_L[o2*n_cells + id] = d_R*vy_R;
-    dev_bounds_L[o3*n_cells + id] = d_R*vz_R;
-    dev_bounds_L[4*n_cells + id] = p_R/(gamma-1.0) + 0.5*d_R*(vx_R*vx_R + vy_R*vy_R + vz_R*vz_R);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_L[(5+i)*n_cells + id] = d_R*scalar_R[i];
-    }
-    #endif
-    #ifdef DE
-    dev_bounds_L[(n_fields-1)*n_cells + id] = d_R*ge_R;
-    #endif
+    Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6;
+    Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6;
+    Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6;
+    Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6;
+    Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6;
+
+    sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed));
+    sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density));
+    sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5);
+  }
+  if (lambda_0 >= 0) {
+    Real const A = (0.5 * dtodx) * (lambda_p - lambda_0);
+    Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0);
+
+    Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6;
+    Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6;
+    Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6;
+    Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6;
+    Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6;
+#ifdef DE
+    chi_ge = A * (del_m_i.gas_energy - ge_6) + B * ge_6;
+#endif  // DE
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      chi_scalar[i] = A * (del_m_i.scalar[i] - scalar_6[i]) + B * scalar_6[i];
+    }
+#endif  // SCALAR
+
+    sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed);
+    sum_3 += chi_3;
+    sum_4 += chi_4;
+#ifdef DE
+    sum_ge += chi_ge;
+#endif  // DE
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      sum_scalar[i] += chi_scalar[i];
+    }
+#endif  // SCALAR
+  }
+  if (lambda_p >= 0) {
+    Real const A = (0.5 * dtodx) * (lambda_p - lambda_p);
+    Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p);
+
+    Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6;
+    Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6;
+    Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6;
+    Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6;
+    Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6;
+
+    sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed));
+    sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density));
+    sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5);
+  }
+
+  // add the corrections to the initial guesses for the interface values
+  interface_L_iph.density += sum_1;
+  interface_L_iph.velocity_x += sum_2;
+  interface_L_iph.velocity_y += sum_3;
+  interface_L_iph.velocity_z += sum_4;
+  interface_L_iph.pressure += sum_5;
+#ifdef DE
+  interface_L_iph.gas_energy += sum_ge;
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    interface_L_iph.scalar[i] += sum_scalar[i];
+  }
+#endif  // SCALAR
+
+  // right-hand interface value, i-1/2
+  sum_1 = 0;
+  sum_2 = 0;
+  sum_3 = 0;
+  sum_4 = 0;
+  sum_5 = 0;
+#ifdef DE
+  sum_ge = 0;
+#endif  // DE
+#ifdef SCALAR
+  for (Real &val : sum_scalar) {
+    val = 0;
+  }
+#endif  // SCALAR
+  if (lambda_m <= 0) {
+    Real const C = (0.5 * dtodx) * (lambda_m - lambda_m);
+    Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m);
+
+    Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6;
+    Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6;
+    Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6;
+    Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6;
+    Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6;
+
+    sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed));
+    sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density));
+    sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5);
+  }
+  if (lambda_0 <= 0) {
+    Real const C = (0.5 * dtodx) * (lambda_m - lambda_0);
+    Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0);
+
+    Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6;
+    Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6;
+    Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6;
+    Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6;
+    Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6;
+#ifdef DE
+    chi_ge = C * (del_m_i.gas_energy + ge_6) + D * ge_6;
+#endif  // DE
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      chi_scalar[i] = C * (del_m_i.scalar[i] + scalar_6[i]) + D * scalar_6[i];
+    }
+#endif  // SCALAR
+
+    sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed);
+    sum_3 += chi_3;
+    sum_4 += chi_4;
+#ifdef DE
+    sum_ge += chi_ge;
+#endif  // DE
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      sum_scalar[i] += chi_scalar[i];
+    }
+#endif  // SCALAR
+  }
+  if (lambda_p <= 0) {
+    Real const C = (0.5 * dtodx) * (lambda_m - lambda_p);
+    Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p);
+
+    Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6;
+    Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6;
+    Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6;
+    Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6;
+    Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6;
+
+    sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed));
+    sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density));
+    sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5);
+  }
 
+  // add the corrections
+  interface_R_imh.density += sum_1;
+  interface_R_imh.velocity_x += sum_2;
+  interface_R_imh.velocity_y += sum_3;
+  interface_R_imh.velocity_z += sum_4;
+  interface_R_imh.pressure += sum_5;
+#ifdef DE
+  interface_R_imh.gas_energy += sum_ge;
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    interface_R_imh.scalar[i] += sum_scalar[i];
   }
+#endif  // SCALAR
+
+  // This is the end of the characteristic tracing
+
+  // enforce minimum values
+  interface_R_imh.density  = fmax(interface_R_imh.density, (Real)TINY_NUMBER);
+  interface_L_iph.density  = fmax(interface_L_iph.density, (Real)TINY_NUMBER);
+  interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER);
+  interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER);
+
+  // Step 11 - Send final values back from kernel
+
+  // Convert the left and right states in the primitive to the conserved variables send final values back from kernel
+  // bounds_R refers to the right side of the i-1/2 interface
+  size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
+  reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma);
+
+  id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny);
+  reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma);
 }
+// =====================================================================================================================
 
+// =====================================================================================================================
+__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
+                                               int ny, int nz, Real gamma, int dir)
+{
+  // get a thread ID
+  int const thread_id = threadIdx.x + blockIdx.x * blockDim.x;
+  int xid, yid, zid;
+  cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid);
 
-#endif //PPMC
-#endif //CUDA
+  // Ensure that we are only operating on cells that will be used
+  if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) {
+    return;
+  }
+
+  // Compute the total number of cells
+  int const n_cells = nx * ny * nz;
+
+  // Set the field indices for the various directions
+  int o1, o2, o3;
+  switch (dir) {
+    case 0:
+      o1 = grid_enum::momentum_x;
+      o2 = grid_enum::momentum_y;
+      o3 = grid_enum::momentum_z;
+      break;
+    case 1:
+      o1 = grid_enum::momentum_y;
+      o2 = grid_enum::momentum_z;
+      o3 = grid_enum::momentum_x;
+      break;
+    case 2:
+      o1 = grid_enum::momentum_z;
+      o2 = grid_enum::momentum_x;
+      o3 = grid_enum::momentum_y;
+      break;
+  }
 
+  // load the 5-cell stencil into registers
+  // cell i
+  reconstruction::Primitive const cell_i =
+      reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // cell i-1. The equality checks the direction and will subtract one from the correct direction
+  // im1 stands for "i minus 1"
+  reconstruction::Primitive const cell_im1 = reconstruction::Load_Data(
+      dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // cell i+1.  The equality checks the direction and add one to the correct direction
+  // ip1 stands for "i plus 1"
+  reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data(
+      dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // cell i-2. The equality checks the direction and will subtract two from the correct direction
+  // im2 stands for "i minus 2"
+  reconstruction::Primitive const cell_im2 =
+      reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1),
+                                zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // cell i+2.  The equality checks the direction and add two to the correct direction
+  // ip2 stands for "i plus 2"
+  reconstruction::Primitive const cell_ip2 =
+      reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1),
+                                zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+
+  // Convert to the characteristic variables
+  Real const sound_speed         = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma);
+  Real const sound_speed_squared = sound_speed * sound_speed;
+
+#ifdef MHD
+  reconstruction::EigenVecs eigenvectors =
+      reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma);
+#else
+  reconstruction::EigenVecs eigenvectors;
+#endif  // MHD
+
+  // Cell i
+  reconstruction::Characteristic const cell_i_characteristic = reconstruction::Primitive_To_Characteristic(
+      cell_i, cell_i, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  // Cell i-1
+  reconstruction::Characteristic const cell_im1_characteristic = reconstruction::Primitive_To_Characteristic(
+      cell_i, cell_im1, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  // Cell i-2
+  reconstruction::Characteristic const cell_im2_characteristic = reconstruction::Primitive_To_Characteristic(
+      cell_i, cell_im2, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  // Cell i+1
+  reconstruction::Characteristic const cell_ip1_characteristic = reconstruction::Primitive_To_Characteristic(
+      cell_i, cell_ip1, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  // Cell i+2
+  reconstruction::Characteristic const cell_ip2_characteristic = reconstruction::Primitive_To_Characteristic(
+      cell_i, cell_ip2, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  // Compute the interface states for each field
+  reconstruction::Characteristic interface_R_imh_characteristic, interface_L_iph_characteristic;
+
+  reconstruction::PPM_Single_Variable(cell_im2_characteristic.a0, cell_im1_characteristic.a0, cell_i_characteristic.a0,
+                                      cell_ip1_characteristic.a0, cell_ip2_characteristic.a0,
+                                      interface_L_iph_characteristic.a0, interface_R_imh_characteristic.a0);
+  reconstruction::PPM_Single_Variable(cell_im2_characteristic.a1, cell_im1_characteristic.a1, cell_i_characteristic.a1,
+                                      cell_ip1_characteristic.a1, cell_ip2_characteristic.a1,
+                                      interface_L_iph_characteristic.a1, interface_R_imh_characteristic.a1);
+  reconstruction::PPM_Single_Variable(cell_im2_characteristic.a2, cell_im1_characteristic.a2, cell_i_characteristic.a2,
+                                      cell_ip1_characteristic.a2, cell_ip2_characteristic.a2,
+                                      interface_L_iph_characteristic.a2, interface_R_imh_characteristic.a2);
+  reconstruction::PPM_Single_Variable(cell_im2_characteristic.a3, cell_im1_characteristic.a3, cell_i_characteristic.a3,
+                                      cell_ip1_characteristic.a3, cell_ip2_characteristic.a3,
+                                      interface_L_iph_characteristic.a3, interface_R_imh_characteristic.a3);
+  reconstruction::PPM_Single_Variable(cell_im2_characteristic.a4, cell_im1_characteristic.a4, cell_i_characteristic.a4,
+                                      cell_ip1_characteristic.a4, cell_ip2_characteristic.a4,
+                                      interface_L_iph_characteristic.a4, interface_R_imh_characteristic.a4);
+
+#ifdef MHD
+  reconstruction::PPM_Single_Variable(cell_im2_characteristic.a5, cell_im1_characteristic.a5, cell_i_characteristic.a5,
+                                      cell_ip1_characteristic.a5, cell_ip2_characteristic.a5,
+                                      interface_L_iph_characteristic.a5, interface_R_imh_characteristic.a5);
+  reconstruction::PPM_Single_Variable(cell_im2_characteristic.a6, cell_im1_characteristic.a6, cell_i_characteristic.a6,
+                                      cell_ip1_characteristic.a6, cell_ip2_characteristic.a6,
+                                      interface_L_iph_characteristic.a6, interface_R_imh_characteristic.a6);
+#endif  // MHD
+
+  // Convert back to primitive variables
+  reconstruction::Primitive interface_L_iph = reconstruction::Characteristic_To_Primitive(
+      cell_i, interface_L_iph_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma);
+  reconstruction::Primitive interface_R_imh = reconstruction::Characteristic_To_Primitive(
+      cell_i, interface_R_imh_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+  // Compute the interfaces for the variables that don't have characteristics
+#ifdef DE
+  reconstruction::PPM_Single_Variable(cell_im2.gas_energy, cell_im1.gas_energy, cell_i.gas_energy, cell_ip1.gas_energy,
+                                      cell_ip2.gas_energy, interface_L_iph.gas_energy, interface_R_imh.gas_energy);
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    reconstruction::PPM_Single_Variable(cell_im2.scalar[i], cell_im1.scalar[i], cell_i.scalar[i], cell_ip1.scalar[i],
+                                        cell_ip2.scalar[i], interface_L_iph.scalar[i], interface_R_imh.scalar[i]);
+  }
+#endif  // SCALAR
+
+  // enforce minimum values
+  interface_R_imh.density  = fmax(interface_R_imh.density, (Real)TINY_NUMBER);
+  interface_L_iph.density  = fmax(interface_L_iph.density, (Real)TINY_NUMBER);
+  interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER);
+  interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER);
+
+  // Step 11 - Send final values back from kernel
+
+  // Convert the left and right states in the primitive to the conserved variables send final values back from kernel
+  // bounds_R refers to the right side of the i-1/2 interface
+  size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
+  reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma);
+
+  id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny);
+  reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma);
+}
+// =====================================================================================================================
diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h
index 6c7cfd9fc..916853874 100644
--- a/src/reconstruction/ppmc_cuda.h
+++ b/src/reconstruction/ppmc_cuda.h
@@ -1,18 +1,53 @@
 /*! \file ppmc_cuda.h
- *  \brief Declarations of the cuda ppm kernels, characteristic reconstruction version. */
-#ifdef CUDA
-#ifdef PPMC
+ *  \brief Declarations of the cuda ppm kernels, characteristic reconstruction
+ * version. */
 
 #ifndef PPMC_CUDA_H
 #define PPMC_CUDA_H
 
 #include "../global/global.h"
 
-/*! \fn void PPMC(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using ppm. */
-__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields);
+/*!
+ * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables and
+ * characteristic tracing. Used for the CTU and SIMPLE integrators. This uses the PPM method described in
+ * Stone et al. 2008 "Athena: A New Code for Astrophysical MHD". Fundementally this method relies on a Van Leer limiter
+ * in the characteristic variables to monotonize the slopes followed by limiting the interface states using the limiter
+ * from Colella & Woodward 1984.
+ *
+ * \param[in] dev_conserved The conserved variable array
+ * \param[out] dev_bounds_L The array of left interfaces
+ * \param[out] dev_bounds_R The array of right interfaces
+ * \param[in] nx The number of cells in the X-direction
+ * \param[in] ny The number of cells in the Y-direction
+ * \param[in] nz The number of cells in the Z-direction
+ * \param[in] dx The length of the cells in the `dir` direction
+ * \param[in] dt The time step
+ * \param[in] gamma The adiabatic index
+ * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z
+ */
+__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx,
+                         Real dt, Real gamma, int dir);
 
-#endif // PPMC_CUDA_H
-#endif // PPMC
-#endif // CUDA
+/*!
+ * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. Used for
+ * the VL (Van Leer) integrators. This uses the PPM method described in
+ * Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via upwind constrained transport".
+ * This method computes the 3rd order interface then applies a mixture of monoticity constraints from from Colella &
+ * Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011; for details see the
+ * `reconstruction::PPM_Single_Variable` function. We found that this newer method and limiters was more stable, less
+ * oscillatory, and faster than the method described in Stone et al. 2008 which is used in PPMC_CTU. The difference is
+ * most pronounced in the Brio & Wu shock tube where the PPM oscillations are much smaller using this method.
+ *
+ * \param[in] dev_conserved The conserved variable array
+ * \param[out] dev_bounds_L The array of left interfaces
+ * \param[out] dev_bounds_R The array of right interfaces
+ * \param[in] nx The number of cells in the X-direction
+ * \param[in] ny The number of cells in the Y-direction
+ * \param[in] nz The number of cells in the Z-direction
+ * \param[in] gamma The adiabatic index
+ * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z
+ */
+__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
+                                               int ny, int nz, Real gamma, int dir);
+
+#endif  // PPMC_CUDA_H
diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu
new file mode 100644
index 000000000..9e9b11140
--- /dev/null
+++ b/src/reconstruction/ppmc_cuda_tests.cu
@@ -0,0 +1,259 @@
+/*!
+ * \file ppmc_cuda_tests.cu
+ * \brief Tests for the contents of ppmc_cuda.h and ppmc_cuda.cu
+ *
+ */
+
+// STL Includes
+#include <algorithm>
+#include <random>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../global/global.h"
+#include "../io/io.h"
+#include "../reconstruction/ppmc_cuda.h"
+#include "../utils/DeviceVector.h"
+#include "../utils/hydro_utilities.h"
+#include "../utils/testing_utilities.h"
+
+TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput)
+{
+  // Set up PRNG to use
+  std::mt19937_64 prng(42);
+  std::uniform_real_distribution<double> doubleRand(0.1, 5);
+
+  // Mock up needed information
+  size_t const nx       = 6;
+  size_t const ny       = 6;
+  size_t const nz       = 6;
+  size_t const n_fields = 5;
+  double const dx       = doubleRand(prng);
+  double const dt       = doubleRand(prng);
+  double const gamma    = 5.0 / 3.0;
+
+  // Setup host grid. Fill host grid with random values and randomly assign maximum value
+  std::vector<double> host_grid(nx * ny * nz * n_fields);
+  for (double &val : host_grid) {
+    val = doubleRand(prng);
+  }
+
+  // Allocating and copying to device
+  cuda_utilities::DeviceVector<double> dev_grid(host_grid.size());
+  dev_grid.cpyHostToDevice(host_grid);
+
+  // Fiducial Data
+  std::vector<std::unordered_map<int, double>> fiducial_interface_left = {{{86, 2.6558981128823214},
+                                                                           {302, 0.84399195916314151},
+                                                                           {518, 2.2002498722761787},
+                                                                           {734, 1.764334292986655},
+                                                                           {950, 3.3600925565746804}},
+                                                                          {{86, 2.4950488327292639},
+                                                                           {302, 0.79287723513518138},
+                                                                           {518, 1.7614576990062414},
+                                                                           {734, 1.8238574169157304},
+                                                                           {950, 3.14294317122161}},
+                                                                          {{86, 2.6558981128823214},
+                                                                           {302, 0.84399195916314151},
+                                                                           {518, 2.0109603398129137},
+                                                                           {734, 1.764334292986655},
+                                                                           {950, 3.2100231679403066}}};
+
+  std::vector<std::unordered_map<int, double>> fiducial_interface_right = {{{85, 2.6558981128823214},
+                                                                            {301, 0.84399195916314151},
+                                                                            {517, 1.8381070277226794},
+                                                                            {733, 1.764334292986655},
+                                                                            {949, 3.0847691079841209}},
+                                                                           {{80, 3.1281603739188069},
+                                                                            {296, 0.99406757727427164},
+                                                                            {512, 1.8732124042412865},
+                                                                            {728, 1.6489758692176784},
+                                                                            {944, 2.8820015278590443}},
+                                                                           {{50, 2.6558981128823214},
+                                                                            {266, 0.84399195916314151},
+                                                                            {482, 2.0109603398129137},
+                                                                            {698, 1.764334292986655},
+                                                                            {914, 3.2100231679403066}}};
+
+  // Loop over different directions
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Allocate device buffers
+    cuda_utilities::DeviceVector<double> dev_interface_left(host_grid.size(), true);
+    cuda_utilities::DeviceVector<double> dev_interface_right(host_grid.size(), true);
+
+    // Launch kernel
+    hipLaunchKernelGGL(PPMC_CTU, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                       dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction);
+    GPU_Error_Check();
+    GPU_Error_Check(cudaDeviceSynchronize());
+
+    // Perform Comparison
+    for (size_t i = 0; i < host_grid.size(); i++) {
+      // Check the left interface
+      double test_val = dev_interface_left.at(i);
+      double fiducial_val =
+          (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end())
+              ? 0.0
+              : fiducial_interface_left.at(direction)[i];
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+
+      // Check the right interface
+      test_val     = dev_interface_right.at(i);
+      fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end())
+                         ? 0.0
+                         : fiducial_interface_right.at(direction)[i];
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+    }
+  }
+}
+
+TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput)
+{
+#ifdef DE
+  /// This test doesn't support Dual Energy. It wouldn't be that hard to add support for DE but the DE parts of the
+  /// reconstructor (loading and PPM_Single_Variable) are well tested elsewhere so there's no need to add the extra
+  /// complexity here.
+  GTEST_SKIP();
+#endif  // DE
+
+  // Set up PRNG to use
+  std::mt19937_64 prng(42);
+  std::uniform_real_distribution<double> doubleRand(0.1, 5);
+
+  // Mock up needed information
+  size_t const nx    = 6;
+  size_t const ny    = 6;
+  size_t const nz    = 6;
+  double const gamma = 5.0 / 3.0;
+#ifdef MHD
+  size_t const n_fields = 8;
+#else   // not MHD
+  size_t const n_fields = 5;
+#endif  // MHD
+
+  // Setup host grid. Fill host grid with random values and randomly assign maximum value
+  std::vector<double> host_grid(nx * ny * nz * n_fields);
+  for (double &val : host_grid) {
+    val = doubleRand(prng);
+  }
+
+  // Allocating and copying to device
+  cuda_utilities::DeviceVector<double> dev_grid(host_grid.size());
+  dev_grid.cpyHostToDevice(host_grid);
+
+// Fiducial Data
+#ifdef MHD
+  std::vector<std::unordered_map<int, double>> fiducial_interface_left = {{{86, 3.6926886385390683},
+                                                                           {302, 2.3022467009220993},
+                                                                           {518, 2.3207781368125389},
+                                                                           {734, 2.6544338753333747},
+                                                                           {950, 11.430630157120799},
+                                                                           {1166, 0.6428577630032507},
+                                                                           {1382, 4.1406925096276597}},
+                                                                          {{86, 3.811691682348938},
+                                                                           {302, 1.4827993897794758},
+                                                                           {518, 2.3955690789476871},
+                                                                           {734, 4.06241130448349},
+                                                                           {950, 10.552876853630949},
+                                                                           {1166, 3.5147238706385471},
+                                                                           {1382, 1.2344879085821312}},
+                                                                          {{86, 3.1608655959160155},
+                                                                           {302, 1.5377824007725194},
+                                                                           {518, 0.41798730655927896},
+                                                                           {734, 2.2721408530383784},
+                                                                           {950, 5.6329522765789646},
+                                                                           {1166, 0.84450832590555991},
+                                                                           {1382, 1.4279317910797107}}};
+
+  std::vector<std::unordered_map<int, double>> fiducial_interface_right = {{{85, 2.8949509658187838},
+                                                                            {301, 0.25766140043685887},
+                                                                            {517, 1.8194165731976308},
+                                                                            {733, 2.0809921071868756},
+                                                                            {949, 8.1315538869542046},
+                                                                            {1165, 0.49708185787322312},
+                                                                            {1381, 3.2017395511439881}},
+                                                                           {{80, 2.8600082827930269},
+                                                                            {296, 0.37343415089084014},
+                                                                            {512, 1.7974558224423689},
+                                                                            {728, 0.94369445956099784},
+                                                                            {944, 7.7011501503138504},
+                                                                            {1160, 3.5147238706385471},
+                                                                            {1376, 1.2344879085821312}},
+                                                                           {{50, 3.1608655959160155},
+                                                                            {266, 0.32035830490636008},
+                                                                            {482, 3.1721881746709815},
+                                                                            {698, 2.2721408530383784},
+                                                                            {914, 14.017699282483312},
+                                                                            {1130, 1.5292690020097823},
+                                                                            {1346, -0.12121484974901264}}};
+#else   // not MHD
+  std::vector<std::unordered_map<int, double>> fiducial_interface_left = {
+      {{86, 4.155160222900312}, {302, 1.1624633361407897}, {518, 1.6379195998743412}, {734, 2.9868746414179093}},
+      {{86, 4.1795874335665655}, {302, 2.1094239978455054}, {518, 2.6811988240843849}, {734, 4.2540957888954054}},
+      {{86, 2.1772852940944429}, {302, 0.58167501916840214}, {518, 1.3683785996473696}, {734, 0.40276763592716164}}};
+
+  std::vector<std::unordered_map<int, double>> fiducial_interface_right = {{{54, 3.8655260187947502},
+                                                                            {85, 2.6637168309565289},
+                                                                            {301, 0.69483650107094164},
+                                                                            {517, 2.7558388224532218},
+                                                                            {733, 1.9147729154830744}},
+                                                                           {{54, 5.7556871317935459},
+                                                                            {80, 2.6515032256234021},
+                                                                            {296, 0.39344537106429511},
+                                                                            {512, 1.6491544916805785},
+                                                                            {728, 0.85830485311660487}},
+                                                                           {{50, 2.8254070932730269},
+                                                                            {54, 2.1884721760267873},
+                                                                            {266, 0.75482470285166003},
+                                                                            {482, 1.7757096932649317},
+                                                                            {698, 3.6101832818706452}}};
+#endif  // MHD
+
+  // Loop over different directions
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Allocate device buffers
+    cuda_utilities::DeviceVector<double> dev_interface_left(nx * ny * nz * (n_fields - 1), true);
+    cuda_utilities::DeviceVector<double> dev_interface_right(nx * ny * nz * (n_fields - 1), true);
+
+    // Launch kernel
+    hipLaunchKernelGGL(PPMC_VL, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                       dev_interface_right.data(), nx, ny, nz, gamma, direction);
+    GPU_Error_Check();
+    GPU_Error_Check(cudaDeviceSynchronize());
+
+    // Perform Comparison
+    for (size_t i = 0; i < dev_interface_left.size(); i++) {
+      // Check the left interface
+      double test_val = dev_interface_left.at(i);
+      double fiducial_val =
+          (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end())
+              ? 0.0
+              : fiducial_interface_left.at(direction)[i];
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+
+      // Check the right interface
+      test_val     = dev_interface_right.at(i);
+      fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end())
+                         ? 0.0
+                         : fiducial_interface_right.at(direction)[i];
+
+      testing_utilities::Check_Results(
+          fiducial_val, test_val,
+          "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction));
+    }
+  }
+}
diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu
index ccd1f5a87..2038f215a 100644
--- a/src/reconstruction/ppmp_cuda.cu
+++ b/src/reconstruction/ppmp_cuda.cu
@@ -1,38 +1,49 @@
 /*! \file ppmp_cuda.cu
- *  \brief Definitions of the piecewise parabolic reconstruction (Fryxell 2000) functions
-           with limiting in the primitive variables. */
-#ifdef CUDA
+ *  \brief Definitions of the piecewise parabolic reconstruction (Fryxell 2000)
+ functions with limiting in the primitive variables. */
+
 #ifdef PPMP
 
-#include "../utils/gpu.hpp"
-#include <math.h>
-#include "../global/global.h"
-#include "../global/global_cuda.h"
-#include "../reconstruction/ppmp_cuda.h"
+  #include <math.h>
+
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../reconstruction/ppmp_cuda.h"
+  #include "../utils/gpu.hpp"
 
-#ifdef DE //PRESSURE_DE
-#include "../utils/hydro_utilities.h"
-#endif
+  #ifdef DE  // PRESSURE_DE
+    #include "../utils/hydro_utilities.h"
+  #endif
 
 // #define STEEPENING
 // #define FLATTENING
-//Note: Errors when using FLATTENING, need to check the ghost cells
-
-/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using ppm with limiting in the primitive variables. */
-__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
+// Note: Errors when using FLATTENING, need to check the ghost cells
+
+/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real
+ *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int
+ n_fields)
+ *  \brief When passed a stencil of conserved variables, returns the left and
+ right boundary values for the interface calculated using ppm with limiting in
+ the primitive variables. */
+__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz,
+                          int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
 {
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
   int o1, o2, o3;
   if (dir == 0) {
-    o1 = 1; o2 = 2; o3 = 3;
+    o1 = 1;
+    o2 = 2;
+    o3 = 3;
   }
   if (dir == 1) {
-    o1 = 2; o2 = 3; o3 = 1;
+    o1 = 2;
+    o2 = 3;
+    o3 = 1;
   }
   if (dir == 2) {
-    o1 = 3; o2 = 1; o3 = 2;
+    o1 = 3;
+    o2 = 1;
+    o3 = 2;
   }
 
   // declare primitive variables in the stencil
@@ -43,7 +54,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
   Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt;
   #ifdef FLATTENING
   Real p_imth, p_ipth;
-  #endif
+  #endif  // FLATTENING
 
   // declare left and right interface values
   Real d_L, vx_L, vy_L, vz_L, p_L;
@@ -53,13 +64,13 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
   Real del_q_imo, del_q_i, del_q_ipo;
 
   #ifndef VL
-//  #ifdef CTU
-  Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries
-  Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i
+  //  #ifdef CTU
+  Real cs, cl, cr;                            // sound speed in cell i, and at left and right boundaries
+  Real del_d, del_vx, del_vy, del_vz, del_p;  // "slope" accross cell i
   Real d_6, vx_6, vy_6, vz_6, p_6;
   Real beta_m, beta_0, beta_p;
   Real alpha_m, alpha_0, alpha_p;
-  Real lambda_m, lambda_0, lambda_p; // speed of characteristics
+  Real lambda_m, lambda_0, lambda_p;  // speed of characteristics
   Real dL_m, vxL_m, pL_m;
   Real dL_0, vyL_0, vzL_0, pL_0;
   Real vxL_p, pL_p;
@@ -68,34 +79,32 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
   Real dR_p, vxR_p, pR_p;
   Real chi_L_m, chi_L_0, chi_L_p;
   Real chi_R_m, chi_R_0, chi_R_p;
-  #endif
+  #endif  // CTU
 
   #ifdef DE
   Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt, ge_L, ge_R, E_kin, E, dge;
-  #ifndef VL
-//  #ifdef CTU
+    #ifndef VL
+  //  #ifdef CTU
   Real del_ge, ge_6, geL_0, geR_0;
-  #endif
-  #endif
+    #endif  // CTU
+  #endif    // DE
 
   #ifdef SCALAR
   Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS];
   Real scalar_L[NSCALARS], scalar_R[NSCALARS];
-  #ifndef VL
-//  #ifdef CTU
+    #ifndef VL
+  //  #ifdef CTU
   Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], scalarR_0[NSCALARS];
-  #endif
-  #endif
-
-
+    #endif  // CTU
+  #endif    // SCALAR
 
   // get a thread ID
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId*blockDim.x;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
   int id;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int zid = tid / (nx * ny);
+  int yid = (tid - zid * nx * ny) / nx;
+  int xid = tid - zid * nx * ny - yid * nx;
 
   int xs, xe, ys, ye, zs, ze;
 
@@ -116,403 +125,442 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
   //   zs = 3; ze = nz-4;
   // }
 
-  //Ignore only the 2 ghost cells on each side ( instead of ignoring 3 ghost cells on each side )
+  // Ignore only the 2 ghost cells on each side ( instead of ignoring 3 ghost
+  // cells on each side )
   if (dir == 0) {
-    xs = 2; xe = nx-3;
-    ys = 0; ye = ny;
-    zs = 0; ze = nz;
+    xs = 2;
+    xe = nx - 3;
+    ys = 0;
+    ye = ny;
+    zs = 0;
+    ze = nz;
   }
   if (dir == 1) {
-    xs = 0; xe = nx;
-    ys = 2; ye = ny-3;
-    zs = 0; ze = nz;
+    xs = 0;
+    xe = nx;
+    ys = 2;
+    ye = ny - 3;
+    zs = 0;
+    ze = nz;
   }
   if (dir == 2) {
-    xs = 0; xe = nx;
-    ys = 0; ye = ny;
-    zs = 2; ze = nz-3;
+    xs = 0;
+    xe = nx;
+    ys = 0;
+    ye = ny;
+    zs = 2;
+    ze = nz - 3;
   }
 
-  if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze)
-  {
+  if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) {
     // load the 5-cell stencil into registers
     // cell i
-    id = xid + yid*nx + zid*nx*ny;
-    d_i  =  dev_conserved[            id];
-    vx_i =  dev_conserved[o1*n_cells + id] / d_i;
-    vy_i =  dev_conserved[o2*n_cells + id] / d_i;
-    vz_i =  dev_conserved[o3*n_cells + id] / d_i;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_i  = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_i  = fmax(p_i, (Real) TINY_NUMBER);
-    #ifdef DE
+    id   = xid + yid * nx + zid * nx * ny;
+    d_i  = dev_conserved[id];
+    vx_i = dev_conserved[o1 * n_cells + id] / d_i;
+    vy_i = dev_conserved[o2 * n_cells + id] / d_i;
+    vz_i = dev_conserved[o3 * n_cells + id] / d_i;
+  #ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_i   = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+  #else
+    p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0);
+  #endif  // PRESSURE_DE
+    p_i = fmax(p_i, (Real)TINY_NUMBER);
+  #ifdef DE
     ge_i = dge / d_i;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_i[i] =  dev_conserved[(5+i)*n_cells + id] / d_i;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i;
     }
-    #endif
+  #endif  // SCALAR
     // cell i-1
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    d_imo  =  dev_conserved[            id];
-    vx_imo =  dev_conserved[o1*n_cells + id] / d_imo;
-    vy_imo =  dev_conserved[o2*n_cells + id] / d_imo;
-    vz_imo =  dev_conserved[o3*n_cells + id] / d_imo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_imo * ( vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_imo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_imo  = (dev_conserved[4*n_cells + id] - 0.5*d_imo*(vx_imo*vx_imo + vy_imo*vy_imo + vz_imo*vz_imo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_imo  = fmax(p_imo, (Real) TINY_NUMBER);
-    #ifdef DE
+    if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny;
+    if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny;
+    if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny;
+    d_imo  = dev_conserved[id];
+    vx_imo = dev_conserved[o1 * n_cells + id] / d_imo;
+    vy_imo = dev_conserved[o2 * n_cells + id] / d_imo;
+    vz_imo = dev_conserved[o3 * n_cells + id] / d_imo;
+  #ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+  #else
+    p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) *
+            (gamma - 1.0);
+  #endif  // PRESSURE_DE
+    p_imo = fmax(p_imo, (Real)TINY_NUMBER);
+  #ifdef DE
     ge_imo = dge / d_imo;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_imo[i]  =  dev_conserved[(5+i)*n_cells + id] / d_imo;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo;
     }
-    #endif
+  #endif  // SCALAR
     // cell i+1
-    if (dir == 0) id = xid+1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid+1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid+1)*nx*ny;
-    d_ipo  =  dev_conserved[            id];
-    vx_ipo =  dev_conserved[o1*n_cells + id] / d_ipo;
-    vy_ipo =  dev_conserved[o2*n_cells + id] / d_ipo;
-    vz_ipo =  dev_conserved[o3*n_cells + id] / d_ipo;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_ipo * ( vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_ipo = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_ipo  = (dev_conserved[4*n_cells + id] - 0.5*d_ipo*(vx_ipo*vx_ipo + vy_ipo*vy_ipo + vz_ipo*vz_ipo)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_ipo  = fmax(p_ipo, (Real) TINY_NUMBER);
-    #ifdef DE
+    if (dir == 0) id = xid + 1 + yid * nx + zid * nx * ny;
+    if (dir == 1) id = xid + (yid + 1) * nx + zid * nx * ny;
+    if (dir == 2) id = xid + yid * nx + (zid + 1) * nx * ny;
+    d_ipo  = dev_conserved[id];
+    vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo;
+    vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo;
+    vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo;
+  #ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+  #else
+    p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) *
+            (gamma - 1.0);
+  #endif  // PRESSURE_DE
+    p_ipo = fmax(p_ipo, (Real)TINY_NUMBER);
+  #ifdef DE
     ge_ipo = dge / d_ipo;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_ipo[i]  =  dev_conserved[(5+i)*n_cells + id] / d_ipo;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo;
     }
-    #endif
+  #endif  // SCALAR
     // cell i-2
-    if (dir == 0) id = xid-2 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-2)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-2)*nx*ny;
-    d_imt  =  dev_conserved[            id];
-    vx_imt =  dev_conserved[o1*n_cells + id] / d_imt;
-    vy_imt =  dev_conserved[o2*n_cells + id] / d_imt;
-    vz_imt =  dev_conserved[o3*n_cells + id] / d_imt;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_imt * ( vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_imt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_imt  = (dev_conserved[4*n_cells + id] - 0.5*d_imt*(vx_imt*vx_imt + vy_imt*vy_imt + vz_imt*vz_imt)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_imt  = fmax(p_imt, (Real) TINY_NUMBER);
-    #ifdef DE
+    if (dir == 0) id = xid - 2 + yid * nx + zid * nx * ny;
+    if (dir == 1) id = xid + (yid - 2) * nx + zid * nx * ny;
+    if (dir == 2) id = xid + yid * nx + (zid - 2) * nx * ny;
+    d_imt  = dev_conserved[id];
+    vx_imt = dev_conserved[o1 * n_cells + id] / d_imt;
+    vy_imt = dev_conserved[o2 * n_cells + id] / d_imt;
+    vz_imt = dev_conserved[o3 * n_cells + id] / d_imt;
+  #ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+  #else
+    p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) *
+            (gamma - 1.0);
+  #endif  // PRESSURE_DE
+    p_imt = fmax(p_imt, (Real)TINY_NUMBER);
+  #ifdef DE
     ge_imt = dge / d_imt;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_imt[i]  =  dev_conserved[(5+i)*n_cells + id] / d_imt;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt;
     }
-    #endif
+  #endif  // SCALAR
     // cell i+2
-    if (dir == 0) id = xid+2 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid+2)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid+2)*nx*ny;
-    d_ipt  =  dev_conserved[            id];
-    vx_ipt =  dev_conserved[o1*n_cells + id] / d_ipt;
-    vy_ipt =  dev_conserved[o2*n_cells + id] / d_ipt;
-    vz_ipt =  dev_conserved[o3*n_cells + id] / d_ipt;
-    #ifdef DE //PRESSURE_DE
-    E = dev_conserved[4*n_cells + id];
-    E_kin = 0.5 * d_ipt * ( vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt );
-    dge = dev_conserved[(n_fields-1)*n_cells + id];
-    p_ipt = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    p_ipt  = (dev_conserved[4*n_cells + id] - 0.5*d_ipt*(vx_ipt*vx_ipt + vy_ipt*vy_ipt + vz_ipt*vz_ipt)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    p_ipt  = fmax(p_ipt, (Real) TINY_NUMBER);
-    #ifdef DE
+    if (dir == 0) id = xid + 2 + yid * nx + zid * nx * ny;
+    if (dir == 1) id = xid + (yid + 2) * nx + zid * nx * ny;
+    if (dir == 2) id = xid + yid * nx + (zid + 2) * nx * ny;
+    d_ipt  = dev_conserved[id];
+    vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt;
+    vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt;
+    vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt;
+  #ifdef DE  // PRESSURE_DE
+    E     = dev_conserved[4 * n_cells + id];
+    E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt);
+    dge   = dev_conserved[(n_fields - 1) * n_cells + id];
+    p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+  #else
+    p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) *
+            (gamma - 1.0);
+  #endif  // PRESSURE_DE
+    p_ipt = fmax(p_ipt, (Real)TINY_NUMBER);
+  #ifdef DE
     ge_ipt = dge / d_ipt;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_ipt[i]  =  dev_conserved[(5+i)*n_cells + id] / d_ipt;
+  #endif  // DE
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt;
     }
-    #endif
-    #ifdef FLATTENING
+  #endif  // SCALAR
+  #ifdef FLATTENING
     // cell i-3
-    if (dir == 0) id = xid-3 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-3)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-3)*nx*ny;
-    p_imth = (dev_conserved[4*n_cells + id] - 0.5*
-             (dev_conserved[o1*n_cells + id]*dev_conserved[o1*n_cells + id] +
-              dev_conserved[o2*n_cells + id]*dev_conserved[o2*n_cells + id] +
-              dev_conserved[o3*n_cells + id]*dev_conserved[o3*n_cells + id]) / dev_conserved[id]) * (gamma - 1.0);
-    p_imth = fmax(p_imth, (Real) TINY_NUMBER);
+    if (dir == 0) id = xid - 3 + yid * nx + zid * nx * ny;
+    if (dir == 1) id = xid + (yid - 3) * nx + zid * nx * ny;
+    if (dir == 2) id = xid + yid * nx + (zid - 3) * nx * ny;
+    p_imth =
+        (dev_conserved[4 * n_cells + id] - 0.5 *
+                                               (dev_conserved[o1 * n_cells + id] * dev_conserved[o1 * n_cells + id] +
+                                                dev_conserved[o2 * n_cells + id] * dev_conserved[o2 * n_cells + id] +
+                                                dev_conserved[o3 * n_cells + id] * dev_conserved[o3 * n_cells + id]) /
+                                               dev_conserved[id]) *
+        (gamma - 1.0);
+    p_imth = fmax(p_imth, (Real)TINY_NUMBER);
     // cell i+3
-    if (dir == 0) id = xid+3 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid+3)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid+3)*nx*ny;
-    p_ipth = (dev_conserved[4*n_cells + id] - 0.5*
-             (dev_conserved[o1*n_cells + id]*dev_conserved[o1*n_cells + id] +
-              dev_conserved[o2*n_cells + id]*dev_conserved[o2*n_cells + id] +
-              dev_conserved[o3*n_cells + id]*dev_conserved[o3*n_cells + id]) / dev_conserved[id]) * (gamma - 1.0);
-    p_ipth = fmax(p_imth, (Real) TINY_NUMBER);
-    #endif //FLATTENING
-
-    //use ppm routines to set cell boundary values (see Fryxell Sec. 3.1.1)
+    if (dir == 0) id = xid + 3 + yid * nx + zid * nx * ny;
+    if (dir == 1) id = xid + (yid + 3) * nx + zid * nx * ny;
+    if (dir == 2) id = xid + yid * nx + (zid + 3) * nx * ny;
+    p_ipth =
+        (dev_conserved[4 * n_cells + id] - 0.5 *
+                                               (dev_conserved[o1 * n_cells + id] * dev_conserved[o1 * n_cells + id] +
+                                                dev_conserved[o2 * n_cells + id] * dev_conserved[o2 * n_cells + id] +
+                                                dev_conserved[o3 * n_cells + id] * dev_conserved[o3 * n_cells + id]) /
+                                               dev_conserved[id]) *
+        (gamma - 1.0);
+    p_ipth = fmax(p_imth, (Real)TINY_NUMBER);
+  #endif  // FLATTENING
+
+    // use ppm routines to set cell boundary values (see Fryxell Sec. 3.1.1)
 
     // Calculate the monotonized slopes for cells imo, i, ipo (density)
     del_q_imo = Calculate_Slope(d_imt, d_imo, d_i);
-    del_q_i   = Calculate_Slope(d_imo, d_i,   d_ipo);
-    del_q_ipo = Calculate_Slope(d_i,   d_ipo, d_ipt);
+    del_q_i   = Calculate_Slope(d_imo, d_i, d_ipo);
+    del_q_ipo = Calculate_Slope(d_i, d_ipo, d_ipt);
 
     // Calculate the interface values for density
-    Interface_Values_PPM(d_imo,  d_i,  d_ipo,  del_q_imo, del_q_i, del_q_ipo, &d_L,  &d_R);
+    Interface_Values_PPM(d_imo, d_i, d_ipo, del_q_imo, del_q_i, del_q_ipo, &d_L, &d_R);
 
     // Calculate the monotonized slopes for cells imo, i, ipo (x-velocity)
     del_q_imo = Calculate_Slope(vx_imt, vx_imo, vx_i);
-    del_q_i   = Calculate_Slope(vx_imo, vx_i,   vx_ipo);
-    del_q_ipo = Calculate_Slope(vx_i,   vx_ipo, vx_ipt);
+    del_q_i   = Calculate_Slope(vx_imo, vx_i, vx_ipo);
+    del_q_ipo = Calculate_Slope(vx_i, vx_ipo, vx_ipt);
 
     // Calculate the interface values for x-velocity
     Interface_Values_PPM(vx_imo, vx_i, vx_ipo, del_q_imo, del_q_i, del_q_ipo, &vx_L, &vx_R);
 
     // Calculate the monotonized slopes for cells imo, i, ipo (y-velocity)
     del_q_imo = Calculate_Slope(vy_imt, vy_imo, vy_i);
-    del_q_i   = Calculate_Slope(vy_imo, vy_i,   vy_ipo);
-    del_q_ipo = Calculate_Slope(vy_i,   vy_ipo, vy_ipt);
+    del_q_i   = Calculate_Slope(vy_imo, vy_i, vy_ipo);
+    del_q_ipo = Calculate_Slope(vy_i, vy_ipo, vy_ipt);
 
     // Calculate the interface values for y-velocity
     Interface_Values_PPM(vy_imo, vy_i, vy_ipo, del_q_imo, del_q_i, del_q_ipo, &vy_L, &vy_R);
 
     // Calculate the monotonized slopes for cells imo, i, ipo (z-velocity)
     del_q_imo = Calculate_Slope(vz_imt, vz_imo, vz_i);
-    del_q_i   = Calculate_Slope(vz_imo, vz_i,   vz_ipo);
-    del_q_ipo = Calculate_Slope(vz_i,   vz_ipo, vz_ipt);
+    del_q_i   = Calculate_Slope(vz_imo, vz_i, vz_ipo);
+    del_q_ipo = Calculate_Slope(vz_i, vz_ipo, vz_ipt);
 
     // Calculate the interface values for z-velocity
     Interface_Values_PPM(vz_imo, vz_i, vz_ipo, del_q_imo, del_q_i, del_q_ipo, &vz_L, &vz_R);
 
     // Calculate the monotonized slopes for cells imo, i, ipo (pressure)
     del_q_imo = Calculate_Slope(p_imt, p_imo, p_i);
-    del_q_i   = Calculate_Slope(p_imo, p_i,   p_ipo);
-    del_q_ipo = Calculate_Slope(p_i,   p_ipo, p_ipt);
+    del_q_i   = Calculate_Slope(p_imo, p_i, p_ipo);
+    del_q_ipo = Calculate_Slope(p_i, p_ipo, p_ipt);
 
     // Calculate the interface values for pressure
-    Interface_Values_PPM(p_imo,  p_i,  p_ipo,  del_q_imo, del_q_i, del_q_ipo, &p_L,  &p_R);
+    Interface_Values_PPM(p_imo, p_i, p_ipo, del_q_imo, del_q_i, del_q_ipo, &p_L, &p_R);
 
-    #ifdef DE
+  #ifdef DE
     // Calculate the monotonized slopes for cells imo, i, ipo (internal energy)
     del_q_imo = Calculate_Slope(ge_imt, ge_imo, ge_i);
-    del_q_i   = Calculate_Slope(ge_imo, ge_i,   ge_ipo);
-    del_q_ipo = Calculate_Slope(ge_i,   ge_ipo, ge_ipt);
+    del_q_i   = Calculate_Slope(ge_imo, ge_i, ge_ipo);
+    del_q_ipo = Calculate_Slope(ge_i, ge_ipo, ge_ipt);
 
     // Calculate the interface values for internal energy
-    Interface_Values_PPM(ge_imo,  ge_i,  ge_ipo,  del_q_imo, del_q_i, del_q_ipo, &ge_L,  &ge_R);
-    #endif
+    Interface_Values_PPM(ge_imo, ge_i, ge_ipo, del_q_imo, del_q_i, del_q_ipo, &ge_L, &ge_R);
+  #endif  // DE
 
-    #ifdef SCALAR
+  #ifdef SCALAR
     // Calculate the monotonized slopes for cells imo, i, ipo (passive scalars)
-    for (int i=0; i<NSCALARS; i++) {
+    for (int i = 0; i < NSCALARS; i++) {
       del_q_imo = Calculate_Slope(scalar_imt[i], scalar_imo[i], scalar_i[i]);
-      del_q_i   = Calculate_Slope(scalar_imo[i], scalar_i[i],   scalar_ipo[i]);
-      del_q_ipo = Calculate_Slope(scalar_i[i],   scalar_ipo[i], scalar_ipt[i]);
+      del_q_i   = Calculate_Slope(scalar_imo[i], scalar_i[i], scalar_ipo[i]);
+      del_q_ipo = Calculate_Slope(scalar_i[i], scalar_ipo[i], scalar_ipt[i]);
 
       // Calculate the interface values for the passive scalars
-      Interface_Values_PPM(scalar_imo[i],  scalar_i[i],  scalar_ipo[i],  del_q_imo, del_q_i, del_q_ipo, &scalar_L[i],  &scalar_R[i]);
+      Interface_Values_PPM(scalar_imo[i], scalar_i[i], scalar_ipo[i], del_q_imo, del_q_i, del_q_ipo, &scalar_L[i],
+                           &scalar_R[i]);
     }
-    #endif
+  #endif  // SCALAR
 
-#ifdef STEEPENING
+  #ifdef STEEPENING
     Real d2_rho_imo, d2_rho_ipo, eta_i;
-    //check for contact discontinuities & steepen if necessary (see Fryxell Sec 3.1.2)
-    //if condition 4 (Fryxell Eqn 37) (Colella Eqn 1.16.5) is true, check further conditions, otherwise do nothing
-    if ((fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo)) > 0.01)
-    {
-      //calculate the second derivative of the density in the imo and ipo cells
+    // check for contact discontinuities & steepen if necessary (see Fryxell
+    // Sec 3.1.2) if condition 4 (Fryxell Eqn 37) (Colella Eqn 1.16.5) is true,
+    // check further conditions, otherwise do nothing
+    if ((fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo)) > 0.01) {
+      // calculate the second derivative of the density in the imo and ipo cells
       d2_rho_imo = calc_d2_rho(d_imt, d_imo, d_i, dx);
       d2_rho_ipo = calc_d2_rho(d_i, d_ipo, d_ipt, dx);
-      //if condition 1 (Fryxell Eqn 38) (Colella Eqn 1.16.5) is true, check further conditions, otherwise do nothing
-      if ((d2_rho_imo * d2_rho_ipo) < 0)
-      {
-        //calculate condition 5, pressure vs density jumps (Fryxell Eqn 39) (Colella Eqn 3.2)
-        //if c5 is true, set value of eta for discontinuity steepening
-        if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo)))
-        {
-          //calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5)
+      // if condition 1 (Fryxell Eqn 38) (Colella Eqn 1.16.5) is true, check
+      // further conditions, otherwise do nothing
+      if ((d2_rho_imo * d2_rho_ipo) < 0) {
+        // calculate condition 5, pressure vs density jumps (Fryxell Eqn 39)
+        // (Colella Eqn 3.2) if c5 is true, set value of eta for discontinuity
+        // steepening
+        if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) {
+          // calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5)
           eta_i = calc_eta(d2_rho_imo, d2_rho_ipo, dx, d_imo, d_ipo);
-          //calculate steepening coefficient (Fryxell Eqn 40) (Colella Eqn 1.16)
-          eta_i = fmax(0, fmin(20*(eta_i-0.05), 1) );
+          // calculate steepening coefficient (Fryxell Eqn 40) (Colella
+          // Eqn 1.16)
+          eta_i = fmax(0, fmin(20 * (eta_i - 0.05), 1));
 
-          //calculate new left and right interface variables using monotonized slopes
+          // calculate new left and right interface variables using monotonized
+          // slopes
           del_q_imo = Calculate_Slope(d_imt, d_imo, d_i);
           del_q_ipo = Calculate_Slope(d_i, d_ipo, d_ipt);
 
-          //replace left and right interface values of density (Colella Eqn 1.14, 1.15)
-          d_L = d_L*(1-eta_i) + (d_imo + 0.5 * del_q_imo) * eta_i;
-          d_R = d_R*(1-eta_i) + (d_ipo - 0.5 * del_q_ipo) * eta_i;
+          // replace left and right interface values of density (Colella
+          // Eqn 1.14, 1.15)
+          d_L = d_L * (1 - eta_i) + (d_imo + 0.5 * del_q_imo) * eta_i;
+          d_R = d_R * (1 - eta_i) + (d_ipo - 0.5 * del_q_ipo) * eta_i;
         }
       }
     }
-#endif
+  #endif  // STEEPENING
 
-#ifdef FLATTENING
+  #ifdef FLATTENING
     Real F_imo, F_i, F_ipo;
-    //flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3)
-    //calculate the shock steepness parameters (Fryxell Eqn 43)
-    //calculate the dimensionless flattening coefficients (Fryxell Eqn 45)
-    F_imo = fmax( 0, fmin(1, 10*(( (p_i -   p_imt) / (p_ipo - p_imth)) - 0.75)) );
-    F_i   = fmax( 0, fmin(1, 10*(( (p_ipo - p_imo) / (p_ipt - p_imt))  - 0.75)) );
-    F_ipo = fmax( 0, fmin(1, 10*(( (p_ipt - p_i)   / (p_ipth - p_imo)) - 0.75)) );
-    //ensure that we are encountering a shock (Fryxell Eqns 46 & 47)
-    if (fabs(p_i - p_imt) / fmin(p_i, p_imt) < 1./3.)  {F_imo = 0;}
-    if (fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo) < 1./3.)  {F_i = 0;}
-    if (fabs(p_ipt - p_i) / fmin(p_ipt, p_i) < 1./3.)  {F_ipo = 0;}
-    if (vx_i   - vx_imt > 0) {F_imo = 0;}
-    if (vx_ipo - vx_imo > 0) {F_i   = 0;}
-    if (vx_ipt - vx_i   > 0) {F_ipo = 0;}
-    //set the flattening coefficient (Fryxell Eqn 48)
-    if (p_ipo - p_imo < 0) {F_i = fmax(F_i, F_ipo);}
-    else {F_i = fmax(F_i, F_imo);}
-    //modify the interface values
-    d_L  = F_i * d_i  + (1 - F_i) * d_L;
+    // flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3)
+    // calculate the shock steepness parameters (Fryxell Eqn 43)
+    // calculate the dimensionless flattening coefficients (Fryxell Eqn 45)
+    F_imo = fmax(0, fmin(1, 10 * (((p_i - p_imt) / (p_ipo - p_imth)) - 0.75)));
+    F_i   = fmax(0, fmin(1, 10 * (((p_ipo - p_imo) / (p_ipt - p_imt)) - 0.75)));
+    F_ipo = fmax(0, fmin(1, 10 * (((p_ipt - p_i) / (p_ipth - p_imo)) - 0.75)));
+    // ensure that we are encountering a shock (Fryxell Eqns 46 & 47)
+    if (fabs(p_i - p_imt) / fmin(p_i, p_imt) < 1. / 3.) {
+      F_imo = 0;
+    }
+    if (fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo) < 1. / 3.) {
+      F_i = 0;
+    }
+    if (fabs(p_ipt - p_i) / fmin(p_ipt, p_i) < 1. / 3.) {
+      F_ipo = 0;
+    }
+    if (vx_i - vx_imt > 0) {
+      F_imo = 0;
+    }
+    if (vx_ipo - vx_imo > 0) {
+      F_i = 0;
+    }
+    if (vx_ipt - vx_i > 0) {
+      F_ipo = 0;
+    }
+    // set the flattening coefficient (Fryxell Eqn 48)
+    if (p_ipo - p_imo < 0) {
+      F_i = fmax(F_i, F_ipo);
+    } else {
+      F_i = fmax(F_i, F_imo);
+    }
+    // modify the interface values
+    d_L  = F_i * d_i + (1 - F_i) * d_L;
     vx_L = F_i * vx_i + (1 - F_i) * vx_L;
     vy_L = F_i * vy_i + (1 - F_i) * vy_L;
     vz_L = F_i * vz_i + (1 - F_i) * vz_L;
-    p_L  = F_i * p_i  + (1 - F_i) * p_L;
+    p_L  = F_i * p_i + (1 - F_i) * p_L;
     #ifdef DE
     ge_L = F_i * ge_i + (1 - F_i) * ge_L;
-    #endif
+    #endif  // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+    for (int i = 0; i < NSCALARS; i++) {
       scalar_L[i] = F_i * scalar_i[i] + (1 - F_i) * scalar_L[i];
     }
-    #endif
-    d_R  = F_i * d_i  + (1 - F_i) * d_R;
+    #endif  // SCALAR
+    d_R  = F_i * d_i + (1 - F_i) * d_R;
     vx_R = F_i * vx_i + (1 - F_i) * vx_R;
     vy_R = F_i * vy_i + (1 - F_i) * vy_R;
     vz_R = F_i * vz_i + (1 - F_i) * vz_R;
-    p_R  = F_i * p_i  + (1 - F_i) * p_R;
+    p_R  = F_i * p_i + (1 - F_i) * p_R;
     #ifdef DE
     ge_R = F_i * ge_i + (1 - F_i) * ge_R;
-    #endif
+    #endif  // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+    for (int i = 0; i < NSCALARS; i++) {
       scalar_R[i] = F_i * scalar_i[i] + (1 - F_i) * scalar_R[i];
     }
-    #endif
-#endif
+    #endif  // SCALAR
+  #endif    // FLATTENING
 
-#ifndef VL
-//#ifdef CTU
-    // compute sound speed in cell i
+  #ifndef VL
+    // #ifdef CTU
+    //  compute sound speed in cell i
     cs = sqrt(gamma * p_i / d_i);
 
     // compute a first guess at the left and right states by taking the average
     // under the characteristic on each side that has the largest speed
 
     // recompute slope across cell for each variable Fryxell Eqn 29
-    del_d  = d_R  - d_L;
+    del_d  = d_R - d_L;
     del_vx = vx_R - vx_L;
     del_vy = vy_R - vy_L;
     del_vz = vz_R - vz_L;
-    del_p  = p_R  - p_L;
+    del_p  = p_R - p_L;
     #ifdef DE
     del_ge = ge_R - ge_L;
-    #endif
+    #endif  // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+    for (int i = 0; i < NSCALARS; i++) {
       del_scalar[i] = scalar_R[i] - scalar_L[i];
     }
-    #endif
+    #endif  // SCALAR
 
-    d_6  = 6.0 * (d_i  - 0.5*(d_L  + d_R));  // Fryxell Eqn 30
-    vx_6 = 6.0 * (vx_i - 0.5*(vx_L + vx_R)); // Fryxell Eqn 30
-    vy_6 = 6.0 * (vy_i - 0.5*(vy_L + vy_R)); // Fryxell Eqn 30
-    vz_6 = 6.0 * (vz_i - 0.5*(vz_L + vz_R)); // Fryxell Eqn 30
-    p_6  = 6.0 * (p_i  - 0.5*(p_L  + p_R));  // Fryxell Eqn 30
+    d_6  = 6.0 * (d_i - 0.5 * (d_L + d_R));     // Fryxell Eqn 30
+    vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R));  // Fryxell Eqn 30
+    vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R));  // Fryxell Eqn 30
+    vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R));  // Fryxell Eqn 30
+    p_6  = 6.0 * (p_i - 0.5 * (p_L + p_R));     // Fryxell Eqn 30
     #ifdef DE
-    ge_6 = 6.0 * (ge_i - 0.5*(ge_L + ge_R)); // Fryxell Eqn 30
-    #endif
+    ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R));  // Fryxell Eqn 30
+    #endif                                      // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalar_6[i] = 6.0 * (scalar_i[i] - 0.5*(scalar_L[i] + scalar_R[i])); // Fryxell Eqn 30
+    for (int i = 0; i < NSCALARS; i++) {
+      scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i]));  // Fryxell Eqn 30
     }
-    #endif
+    #endif  // SCALAR
 
-    // set speed of characteristics (v-c, v, v+c) using average values of v and c
+    // set speed of characteristics (v-c, v, v+c) using average values of v and
+    // c
     lambda_m = vx_i - cs;
     lambda_0 = vx_i;
     lambda_p = vx_i + cs;
 
     // calculate betas (for left state guesses)
-    beta_m = fmax( (lambda_m * dt / dx) , 0.0 ); // Fryxell Eqn 59
-    beta_0 = fmax( (lambda_0 * dt / dx) , 0.0); // Fryxell Eqn 59
-    beta_p = fmax( (lambda_p * dt / dx) , 0.0 ); // Fryxell Eqn 59
+    beta_m = fmax((lambda_m * dt / dx), 0.0);  // Fryxell Eqn 59
+    beta_0 = fmax((lambda_0 * dt / dx), 0.0);  // Fryxell Eqn 59
+    beta_p = fmax((lambda_p * dt / dx), 0.0);  // Fryxell Eqn 59
 
-    //calculate alphas (for right state guesses)
-    alpha_m = fmax( (-lambda_m * dt / dx), 0.0); // Fryxell Eqn 61
-    alpha_0 = fmax( (-lambda_0 * dt / dx), 0.0); // Fryxell Eqn 61
-    alpha_p = fmax( (-lambda_p * dt / dx), 0.0); // Fryxell Eqn 61
+    // calculate alphas (for right state guesses)
+    alpha_m = fmax((-lambda_m * dt / dx), 0.0);  // Fryxell Eqn 61
+    alpha_0 = fmax((-lambda_0 * dt / dx), 0.0);  // Fryxell Eqn 61
+    alpha_p = fmax((-lambda_p * dt / dx), 0.0);  // Fryxell Eqn 61
 
     // average values under characteristics for left interface (Fryxell Eqn 60)
-    dL_m  = d_L  + 0.5 * alpha_m * (del_d  + d_6  * (1 - (2./3.) * alpha_m));
-    vxL_m = vx_L + 0.5 * alpha_m * (del_vx + vx_6 * (1 - (2./3.) * alpha_m));
-    pL_m  = p_L  + 0.5 * alpha_m * (del_p  + p_6  * (1 - (2./3.) * alpha_m));
-    dL_0  = d_L  + 0.5 * alpha_0 * (del_d  + d_6  * (1 - (2./3.) * alpha_0));
-    vyL_0 = vy_L + 0.5 * alpha_0 * (del_vy + vy_6 * (1 - (2./3.) * alpha_0));
-    vzL_0 = vz_L + 0.5 * alpha_0 * (del_vz + vz_6 * (1 - (2./3.) * alpha_0));
+    dL_m  = d_L + 0.5 * alpha_m * (del_d + d_6 * (1 - (2. / 3.) * alpha_m));
+    vxL_m = vx_L + 0.5 * alpha_m * (del_vx + vx_6 * (1 - (2. / 3.) * alpha_m));
+    pL_m  = p_L + 0.5 * alpha_m * (del_p + p_6 * (1 - (2. / 3.) * alpha_m));
+    dL_0  = d_L + 0.5 * alpha_0 * (del_d + d_6 * (1 - (2. / 3.) * alpha_0));
+    vyL_0 = vy_L + 0.5 * alpha_0 * (del_vy + vy_6 * (1 - (2. / 3.) * alpha_0));
+    vzL_0 = vz_L + 0.5 * alpha_0 * (del_vz + vz_6 * (1 - (2. / 3.) * alpha_0));
     #ifdef DE
-    geL_0 = ge_L + 0.5 * alpha_0 * (del_ge + ge_6 * (1 - (2./3.) * alpha_0));
-    #endif
+    geL_0 = ge_L + 0.5 * alpha_0 * (del_ge + ge_6 * (1 - (2. / 3.) * alpha_0));
+    #endif  // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalarL_0[i] = scalar_L[i] + 0.5 * alpha_0 * (del_scalar[i] + scalar_6[i] * (1 - (2./3.) * alpha_0));
+    for (int i = 0; i < NSCALARS; i++) {
+      scalarL_0[i] = scalar_L[i] + 0.5 * alpha_0 * (del_scalar[i] + scalar_6[i] * (1 - (2. / 3.) * alpha_0));
     }
-    #endif
-    pL_0  = p_L  + 0.5 * alpha_0 * (del_p  + p_6  * (1 - (2./3.) * alpha_0));
-    vxL_p = vx_L + 0.5 * alpha_p * (del_vx + vx_6 * (1 - (2./3.) * alpha_p));
-    pL_p  = p_L  + 0.5 * alpha_p * (del_p  + p_6  * (1 - (2./3.) * alpha_p));
+    #endif  // SCALAR
+    pL_0  = p_L + 0.5 * alpha_0 * (del_p + p_6 * (1 - (2. / 3.) * alpha_0));
+    vxL_p = vx_L + 0.5 * alpha_p * (del_vx + vx_6 * (1 - (2. / 3.) * alpha_p));
+    pL_p  = p_L + 0.5 * alpha_p * (del_p + p_6 * (1 - (2. / 3.) * alpha_p));
 
     // average values under characteristics for right interface (Fryxell Eqn 58)
-    vxR_m = vx_R - 0.5 * beta_m * (del_vx - vx_6 * (1 - (2./3.) * beta_m));
-    pR_m  = p_R  - 0.5 * beta_m * (del_p  - p_6  * (1 - (2./3.) * beta_m));
-    dR_0  = d_R  - 0.5 * beta_0 * (del_d  - d_6  * (1 - (2./3.) * beta_0));
-    vyR_0 = vy_R - 0.5 * beta_0 * (del_vy - vy_6 * (1 - (2./3.) * beta_0));
-    vzR_0 = vz_R - 0.5 * beta_0 * (del_vz - vz_6 * (1 - (2./3.) * beta_0));
+    vxR_m = vx_R - 0.5 * beta_m * (del_vx - vx_6 * (1 - (2. / 3.) * beta_m));
+    pR_m  = p_R - 0.5 * beta_m * (del_p - p_6 * (1 - (2. / 3.) * beta_m));
+    dR_0  = d_R - 0.5 * beta_0 * (del_d - d_6 * (1 - (2. / 3.) * beta_0));
+    vyR_0 = vy_R - 0.5 * beta_0 * (del_vy - vy_6 * (1 - (2. / 3.) * beta_0));
+    vzR_0 = vz_R - 0.5 * beta_0 * (del_vz - vz_6 * (1 - (2. / 3.) * beta_0));
     #ifdef DE
-    geR_0 = ge_R - 0.5 * beta_0 * (del_ge - ge_6 * (1 - (2./3.) * beta_0));
-    #endif
+    geR_0 = ge_R - 0.5 * beta_0 * (del_ge - ge_6 * (1 - (2. / 3.) * beta_0));
+    #endif  // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalarR_0[i] = scalar_R[i] - 0.5 * beta_0 * (del_scalar[i] - scalar_6[i] * (1 - (2./3.) * beta_0));
+    for (int i = 0; i < NSCALARS; i++) {
+      scalarR_0[i] = scalar_R[i] - 0.5 * beta_0 * (del_scalar[i] - scalar_6[i] * (1 - (2. / 3.) * beta_0));
     }
-    #endif
-    pR_0  = p_R  - 0.5 * beta_0 * (del_p  - p_6  * (1 - (2./3.) * beta_0));
-    dR_p  = d_R  - 0.5 * beta_p * (del_d  - d_6  * (1 - (2./3.) * beta_p));
-    vxR_p = vx_R - 0.5 * beta_p * (del_vx - vx_6 * (1 - (2./3.) * beta_p));
-    pR_p  = p_R  - 0.5 * beta_p * (del_p  - p_6  * (1 - (2./3.) * beta_p));
+    #endif  // SCALAR
+    pR_0  = p_R - 0.5 * beta_0 * (del_p - p_6 * (1 - (2. / 3.) * beta_0));
+    dR_p  = d_R - 0.5 * beta_p * (del_d - d_6 * (1 - (2. / 3.) * beta_p));
+    vxR_p = vx_R - 0.5 * beta_p * (del_vx - vx_6 * (1 - (2. / 3.) * beta_p));
+    pR_p  = p_R - 0.5 * beta_p * (del_p - p_6 * (1 - (2. / 3.) * beta_p));
 
     // as a first guess, use characteristics with the largest speeds
     // for transverse velocities, use the 0 characteristic
@@ -524,12 +572,12 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
     p_L  = pL_m;
     #ifdef DE
     ge_L = geL_0;
-    #endif
+    #endif  // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+    for (int i = 0; i < NSCALARS; i++) {
       scalar_L[i] = scalarL_0[i];
     }
-    #endif
+    #endif  // SCALAR
     // right
     d_R  = dR_p;
     vx_R = vxR_p;
@@ -538,12 +586,12 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
     p_R  = pR_p;
     #ifdef DE
     ge_R = geR_0;
-    #endif
+    #endif  // DE
     #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+    for (int i = 0; i < NSCALARS; i++) {
       scalar_R[i] = scalarR_0[i];
     }
-    #endif
+    #endif  // SCALAR
 
     // correct these initial guesses by taking into account the number of
     // characteristics on each side of the interface
@@ -553,77 +601,87 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou
     cr = sqrt(gamma * p_R / d_L);
 
     // calculate the chi values (Fryxell Eqns 62 & 63)
-    chi_L_m =  1./(2*d_L*cl) * (vx_L - vxL_m - (p_L - pL_m)/(d_L*cl));
-    chi_L_p = -1./(2*d_L*cl) * (vx_L - vxL_p + (p_L - pL_p)/(d_L*cl));
-    chi_L_0 = (p_L - pL_0)/(d_L*d_L*cl*cl) + 1./d_L - 1./dL_0;
-    chi_R_m =  1./(2*d_R*cr) * (vx_R - vxR_m - (p_R - pR_m)/(d_R*cr));
-    chi_R_p = -1./(2*d_R*cr) * (vx_R - vxR_p + (p_R - pR_p)/(d_R*cr));
-    chi_R_0 = (p_R - pR_0)/(d_R*d_R*cr*cr) + 1./d_R - 1./dR_0;
-
-    // set chi to 0 if characteristic velocity has the wrong sign (Fryxell Eqn 64)
-    if (lambda_m >= 0) { chi_L_m = 0; }
-    if (lambda_0 >= 0) { chi_L_0 = 0; }
-    if (lambda_p >= 0) { chi_L_p = 0; }
-    if (lambda_m <= 0) { chi_R_m = 0; }
-    if (lambda_0 <= 0) { chi_R_0 = 0; }
-    if (lambda_p <= 0) { chi_R_p = 0; }
-
-    // use the chi values to correct the initial guesses and calculate final input states
-    p_L = p_L + (d_L*d_L*cl*cl) * (chi_L_p + chi_L_m);
-    vx_L = vx_L + d_L*cl * (chi_L_p - chi_L_m);
-    d_L = pow( ((1.0/d_L) - (chi_L_m + chi_L_0 + chi_L_p)) , -1);
-    p_R = p_L + (d_R*d_R*cr*cr) * (chi_R_p + chi_R_m);
-    vx_R = vx_R + d_R*cr * (chi_R_p - chi_R_m);
-    d_R = pow( ((1.0/d_R) - (chi_R_m + chi_R_0 + chi_R_p)) , -1);
-#endif //CTU
+    chi_L_m = 1. / (2 * d_L * cl) * (vx_L - vxL_m - (p_L - pL_m) / (d_L * cl));
+    chi_L_p = -1. / (2 * d_L * cl) * (vx_L - vxL_p + (p_L - pL_p) / (d_L * cl));
+    chi_L_0 = (p_L - pL_0) / (d_L * d_L * cl * cl) + 1. / d_L - 1. / dL_0;
+    chi_R_m = 1. / (2 * d_R * cr) * (vx_R - vxR_m - (p_R - pR_m) / (d_R * cr));
+    chi_R_p = -1. / (2 * d_R * cr) * (vx_R - vxR_p + (p_R - pR_p) / (d_R * cr));
+    chi_R_0 = (p_R - pR_0) / (d_R * d_R * cr * cr) + 1. / d_R - 1. / dR_0;
+
+    // set chi to 0 if characteristic velocity has the wrong sign (Fryxell Eqn
+    // 64)
+    if (lambda_m >= 0) {
+      chi_L_m = 0;
+    }
+    if (lambda_0 >= 0) {
+      chi_L_0 = 0;
+    }
+    if (lambda_p >= 0) {
+      chi_L_p = 0;
+    }
+    if (lambda_m <= 0) {
+      chi_R_m = 0;
+    }
+    if (lambda_0 <= 0) {
+      chi_R_0 = 0;
+    }
+    if (lambda_p <= 0) {
+      chi_R_p = 0;
+    }
 
+    // use the chi values to correct the initial guesses and calculate final
+    // input states
+    p_L  = p_L + (d_L * d_L * cl * cl) * (chi_L_p + chi_L_m);
+    vx_L = vx_L + d_L * cl * (chi_L_p - chi_L_m);
+    d_L  = pow(((1.0 / d_L) - (chi_L_m + chi_L_0 + chi_L_p)), -1);
+    p_R  = p_L + (d_R * d_R * cr * cr) * (chi_R_p + chi_R_m);
+    vx_R = vx_R + d_R * cr * (chi_R_p - chi_R_m);
+    d_R  = pow(((1.0 / d_R) - (chi_R_m + chi_R_0 + chi_R_p)), -1);
+  #endif  // CTU
 
     // Apply mimimum constraints
-    d_L = fmax(d_L, (Real) TINY_NUMBER);
-    d_R = fmax(d_R, (Real) TINY_NUMBER);
-    p_L = fmax(p_L, (Real) TINY_NUMBER);
-    p_R = fmax(p_R, (Real) TINY_NUMBER);
-
-    // Convert the left and right states in the primitive to the conserved variables
-    // send final values back from kernel
-    // bounds_R refers to the right side of the i-1/2 interface
-    if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny;
-    if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny;
-    if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny;
-    dev_bounds_R[            id] = d_L;
-    dev_bounds_R[o1*n_cells + id] = d_L*vx_L;
-    dev_bounds_R[o2*n_cells + id] = d_L*vy_L;
-    dev_bounds_R[o3*n_cells + id] = d_L*vz_L;
-    dev_bounds_R[4*n_cells + id] = p_L/(gamma-1.0) + 0.5*d_L*(vx_L*vx_L + vy_L*vy_L + vz_L*vz_L);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_R[(5+i)*n_cells + id] = d_L*scalar_L[i];
+    d_L = fmax(d_L, (Real)TINY_NUMBER);
+    d_R = fmax(d_R, (Real)TINY_NUMBER);
+    p_L = fmax(p_L, (Real)TINY_NUMBER);
+    p_R = fmax(p_R, (Real)TINY_NUMBER);
+
+    // Convert the left and right states in the primitive to the conserved
+    // variables send final values back from kernel bounds_R refers to the right
+    // side of the i-1/2 interface
+    if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny;
+    if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny;
+    if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny;
+    dev_bounds_R[id]                = d_L;
+    dev_bounds_R[o1 * n_cells + id] = d_L * vx_L;
+    dev_bounds_R[o2 * n_cells + id] = d_L * vy_L;
+    dev_bounds_R[o3 * n_cells + id] = d_L * vz_L;
+    dev_bounds_R[4 * n_cells + id]  = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L);
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_R[(n_fields-1)*n_cells + id] = d_L*ge_L;
-    #endif
+  #endif  // SCALAR
+  #ifdef DE
+    dev_bounds_R[(n_fields - 1) * n_cells + id] = d_L * ge_L;
+  #endif  // DE
     // bounds_L refers to the left side of the i+1/2 interface
-    id = xid + yid*nx + zid*nx*ny;
-    dev_bounds_L[            id] = d_R;
-    dev_bounds_L[o1*n_cells + id] = d_R*vx_R;
-    dev_bounds_L[o2*n_cells + id] = d_R*vy_R;
-    dev_bounds_L[o3*n_cells + id] = d_R*vz_R;
-    dev_bounds_L[4*n_cells + id] = p_R/(gamma-1.0) + 0.5*d_R*(vx_R*vx_R + vy_R*vy_R + vz_R*vz_R);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dev_bounds_L[(5+i)*n_cells + id] = d_R*scalar_R[i];
+    id                              = xid + yid * nx + zid * nx * ny;
+    dev_bounds_L[id]                = d_R;
+    dev_bounds_L[o1 * n_cells + id] = d_R * vx_R;
+    dev_bounds_L[o2 * n_cells + id] = d_R * vy_R;
+    dev_bounds_L[o3 * n_cells + id] = d_R * vz_R;
+    dev_bounds_L[4 * n_cells + id]  = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R);
+  #ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i];
     }
-    #endif
-    #ifdef DE
-    dev_bounds_L[(n_fields-1)*n_cells + id] = d_R*ge_R;
-    #endif
-
+  #endif  // SCALAR
+  #ifdef DE
+    dev_bounds_L[(n_fields - 1) * n_cells + id] = d_R * ge_R;
+  #endif  // DE
   }
 }
 
-
-
 /*! \fn __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo)
  *  \brief Calculates the limited slope across a cell.*/
 __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo)
@@ -631,70 +689,76 @@ __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo)
   Real del_q_L, del_q_R, del_q_C, del_q_G;
   Real lim_slope_a, lim_slope_b, del_q_m;
 
-  // Compute the left, right, and centered differences of the primitive variables
-  // Note that here L and R refer to locations relative to the cell center
+  // Compute the left, right, and centered differences of the primitive
+  // variables Note that here L and R refer to locations relative to the cell
+  // center
 
   // left
-  del_q_L  = q_i - q_imo;
+  del_q_L = q_i - q_imo;
   // right
-  del_q_R  = q_ipo - q_i;
+  del_q_R = q_ipo - q_i;
   // centered
-  del_q_C  = 0.5*(q_ipo - q_imo);
+  del_q_C = 0.5 * (q_ipo - q_imo);
   // Van Leer
-  if (del_q_L*del_q_R > 0.0) { del_q_G = 2.0*del_q_L*del_q_R / (del_q_L+del_q_R); }
-  else { del_q_G = 0.0; }
-
+  if (del_q_L * del_q_R > 0.0) {
+    del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R);
+  } else {
+    del_q_G = 0.0;
+  }
 
   // Monotonize the differences
   lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R));
   lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G));
 
   // Minmod limiter
-  //del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C));
+  // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C));
 
   // Van Leer limiter
-  del_q_m = sgn_CUDA(del_q_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b);
+  del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b);
 
   return del_q_m;
-
 }
 
-
-/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R)
- *  \brief Calculates the left and right interface values for a cell using parabolic reconstruction
-           in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/
-__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R)
+/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo,
+ Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R)
+ *  \brief Calculates the left and right interface values for a cell using
+ parabolic reconstruction in the primitive variables with limited slopes
+ provided. Applies further monotonicity constraints.*/
+__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo,
+                                     Real *q_L, Real *q_R)
 {
   // Calculate the left and right interface values using the limited slopes
-  *q_L = 0.5*(q_i + q_imo) - (1.0/6.0)*(del_q_i - del_q_imo);
-  *q_R = 0.5*(q_ipo + q_i) - (1.0/6.0)*(del_q_ipo - del_q_i);
+  *q_L = 0.5 * (q_i + q_imo) - (1.0 / 6.0) * (del_q_i - del_q_imo);
+  *q_R = 0.5 * (q_ipo + q_i) - (1.0 / 6.0) * (del_q_ipo - del_q_i);
 
-  // Apply further monotonicity constraints to ensure interface values lie between
-  // neighboring cell-centered values
+  // Apply further monotonicity constraints to ensure interface values lie
+  // between neighboring cell-centered values
 
   // local maximum or minimum criterion (Fryxell Eqn 52, Fig 11)
-  if ((*q_R - q_i)*(q_i - *q_L) <= 0) *q_L = *q_R = q_i;
+  if ((*q_R - q_i) * (q_i - *q_L) <= 0) *q_L = *q_R = q_i;
 
   // steep gradient criterion (Fryxell Eqn 53, Fig 12)
-  if (6.0*(*q_R - *q_L)*(q_i - 0.5*(*q_L + *q_R)) > (*q_R - *q_L)*(*q_R - *q_L))  *q_L = 3.0*q_i - 2.0*(*q_R);
-  if (6.0*(*q_R - *q_L)*(q_i - 0.5*(*q_L + *q_R)) < -(*q_R - *q_L)*(*q_R - *q_L)) *q_R = 3.0*q_i - 2.0*(*q_L);
-
-  *q_L  = fmax( fmin(q_i, q_imo), *q_L );
-  *q_L  = fmin( fmax(q_i, q_imo), *q_L );
-  *q_R  = fmax( fmin(q_i, q_ipo), *q_R );
-  *q_R  = fmin( fmax(q_i, q_ipo), *q_R );
+  if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > (*q_R - *q_L) * (*q_R - *q_L)) {
+    *q_L = 3.0 * q_i - 2.0 * (*q_R);
+  }
+  if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < -(*q_R - *q_L) * (*q_R - *q_L)) {
+    *q_R = 3.0 * q_i - 2.0 * (*q_L);
+  }
 
+  *q_L = fmax(fmin(q_i, q_imo), *q_L);
+  *q_L = fmin(fmax(q_i, q_imo), *q_L);
+  *q_R = fmax(fmin(q_i, q_ipo), *q_R);
+  *q_R = fmin(fmax(q_i, q_ipo), *q_R);
 }
 
-
 /*! \fn calc_d2_rho
- *  \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) */
+ *  \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35)
+ */
 __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx)
 {
-  return (1. / (6*dx*dx)) * (rho_ipo - 2*rho_i + rho_imo);
+  return (1. / (6 * dx * dx)) * (rho_ipo - 2 * rho_i + rho_imo);
 }
 
-
 /*! \fn calc_eta
  *  \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives
     See Fryxell Eqn 36. */
@@ -702,13 +766,10 @@ __device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo,
 {
   Real A, B;
 
-  A = (d2rho_ipo - d2rho_imo)*dx*dx;
+  A = (d2rho_ipo - d2rho_imo) * dx * dx;
   B = 1.0 / (rho_ipo - rho_imo);
 
   return -A * B;
 }
 
-
-
-#endif //PPMP
-#endif //CUDA
+#endif  // PPMP
diff --git a/src/reconstruction/ppmp_cuda.h b/src/reconstruction/ppmp_cuda.h
index c8a85711e..064d328fa 100644
--- a/src/reconstruction/ppmp_cuda.h
+++ b/src/reconstruction/ppmp_cuda.h
@@ -1,30 +1,35 @@
 /*! \file ppmp_cuda.h
  *  \brief Declarations of the cuda ppmp kernels. */
 
-#ifdef CUDA
-
 #ifndef PPMP_CUDA_H
 #define PPMP_CUDA_H
 
-
 #include "../global/global.h"
 
-/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields)
- *  \brief When passed a stencil of conserved variables, returns the left and right
-           boundary values for the interface calculated using ppm with limiting in the primitive variables. */
-__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields);
+/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real
+ *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real
+ gamma, int dir, int n_fields)
+ *  \brief When passed a stencil of conserved variables, returns the left and
+ right boundary values for the interface calculated using ppm with limiting in
+ the primitive variables. */
+__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz,
+                          int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields);
 
 /*! \fn __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo)
  *  \brief Calculates the limited slope across a cell.*/
 __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo);
 
-/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R)
- *  \brief Calculates the left and right interface values for a cell using parabolic reconstruction
-           in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/
-__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R);
+/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo,
+ Real *q_L, Real *q_R)
+ *  \brief Calculates the left and right interface values for a cell using
+ parabolic reconstruction in the primitive variables with limited slopes
+ provided. Applies further monotonicity constraints.*/
+__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo,
+                                     Real *q_L, Real *q_R);
 
 /*! \fn calc_d2_rho
- *  \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) */
+ *  \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35)
+ */
 __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx);
 
 /*! \fn calc_eta
@@ -32,5 +37,4 @@ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx);
     See Fryxell Eqn 36. */
 __device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo);
 
-#endif // PPMP_CUDA_H
-#endif // CUDA
+#endif  // PPMP_CUDA_H
diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h
new file mode 100644
index 000000000..23442a776
--- /dev/null
+++ b/src/reconstruction/reconstruction.h
@@ -0,0 +1,970 @@
+/*!
+ * \file reconstruction.h
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contain the various structs and device functions needed for interface reconstruction
+ *
+ */
+
+#pragma once
+
+// External Includes
+
+// Local Includes
+#include "../global/global.h"
+#include "../global/global_cuda.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
+#include "../utils/hydro_utilities.h"
+#include "../utils/mhd_utilities.h"
+
+/*!
+ * \brief Namespace to contain various utilities for the interface reconstruction kernels
+ *
+ */
+namespace reconstruction
+{
+// =====================================================================================================================
+/*!
+ * \brief A struct for the primitive variables
+ *
+ */
+struct Primitive {
+  // Hydro variables
+  Real density, velocity_x, velocity_y, velocity_z, pressure;
+
+#ifdef MHD
+  // These are all cell centered values
+  Real magnetic_x, magnetic_y, magnetic_z;
+#endif  // MHD
+
+#ifdef DE
+  Real gas_energy;
+#endif  // DE
+
+#ifdef SCALAR
+  Real scalar[grid_enum::nscalars];
+#endif  // SCALAR
+};
+// =====================================================================================================================
+
+// =====================================================================================================================
+struct EigenVecs {
+  Real magnetosonic_speed_fast, magnetosonic_speed_slow, magnetosonic_speed_fast_squared,
+      magnetosonic_speed_slow_squared;
+  Real alpha_fast, alpha_slow;
+  Real beta_y, beta_z;
+  Real n_fs, sign;
+  /// The non-primed values are used in the conversion from characteristic to primitive variables
+  Real q_fast, q_slow;
+  Real a_fast, a_slow;
+  /// The primed values are used in the conversion from primitive to characteristic variables
+  Real q_prime_fast, q_prime_slow;
+  Real a_prime_fast, a_prime_slow;
+};
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief A struct for the characteristic variables
+ *
+ */
+struct Characteristic {
+  // Hydro variables
+  Real a0, a1, a2, a3, a4;
+
+#ifdef MHD
+  Real a5, a6;
+#endif  // MHD
+};
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Determine if a thread is within the allowed range
+ *
+ * \tparam order The order of the reconstruction. 2 for PLM, 3 for PPM
+ * \param nx The number of cells in the X-direction
+ * \param ny The number of cells in the Y-direction
+ * \param nz The number of cells in the Z-direction
+ * \param xid The X thread index
+ * \param yid The Y thread index
+ * \param zid The Z thread index
+ * \return true The thread is NOT in the allowed range
+ * \return false The thread is in the allowed range
+ */
+template <int order>
+bool __device__ __host__ __inline__ Thread_Guard(int const &nx, int const &ny, int const &nz, int const &xid,
+                                                 int const &yid, int const &zid)
+{
+  // These checks all make sure that the xid is such that the thread won't try to load any memory that is out of bounds
+
+  // X check
+  bool out_of_bounds_thread = xid < order - 1 or xid >= nx - order;
+
+  // Y check, only used for 2D and 3D
+  if (ny > 1) {
+    out_of_bounds_thread = yid < order - 1 or yid >= ny - order or out_of_bounds_thread;
+  }
+
+  // z check, only used for 3D
+  if (nz > 1) {
+    out_of_bounds_thread = zid < order - 1 or zid >= nz - order or out_of_bounds_thread;
+  }
+  // This is needed in the case that nz == 1 to avoid overrun
+  else {
+    out_of_bounds_thread = zid >= nz or out_of_bounds_thread;
+  }
+
+  return out_of_bounds_thread;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Load the data for reconstruction
+ *
+ * \param[in] dev_conserved The conserved array
+ * \param[in] xid The xid of the cell to load data from
+ * \param[in] yid The yid of the cell to load data from
+ * \param[in] zid The zid of the cell to load data from
+ * \param[in] nx Size in the X direction
+ * \param[in] ny Size in the Y direction
+ * \param[in] n_cells The total number of cells
+ * \param[in] o1 Directional parameter
+ * \param[in] o2 Directional parameter
+ * \param[in] o3 Directional parameter
+ * \param[in] gamma The adiabatic index
+ * \return Primitive The loaded cell data
+ */
+Primitive __device__ __host__ __inline__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid,
+                                                   size_t const &zid, size_t const &nx, size_t const &ny,
+                                                   size_t const &n_cells, size_t const &o1, size_t const &o2,
+                                                   size_t const &o3, Real const &gamma)
+{  // Compute index
+  size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
+
+  // Declare the variable we will return
+  Primitive loaded_data;
+
+  // Load hydro variables except pressure
+  loaded_data.density    = dev_conserved[grid_enum::density * n_cells + id];
+  loaded_data.velocity_x = dev_conserved[o1 * n_cells + id] / loaded_data.density;
+  loaded_data.velocity_y = dev_conserved[o2 * n_cells + id] / loaded_data.density;
+  loaded_data.velocity_z = dev_conserved[o3 * n_cells + id] / loaded_data.density;
+
+  // Load MHD variables. Note that I only need the centered values for the transverse fields except for the initial
+  // computation of the primitive variables
+#ifdef MHD
+  auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny);
+  switch (o1) {
+    case grid_enum::momentum_x:
+      loaded_data.magnetic_x = magnetic_centered.x;
+      loaded_data.magnetic_y = magnetic_centered.y;
+      loaded_data.magnetic_z = magnetic_centered.z;
+      break;
+    case grid_enum::momentum_y:
+      loaded_data.magnetic_x = magnetic_centered.y;
+      loaded_data.magnetic_y = magnetic_centered.z;
+      loaded_data.magnetic_z = magnetic_centered.x;
+      break;
+    case grid_enum::momentum_z:
+      loaded_data.magnetic_x = magnetic_centered.z;
+      loaded_data.magnetic_y = magnetic_centered.x;
+      loaded_data.magnetic_z = magnetic_centered.y;
+      break;
+  }
+#endif  // MHD
+
+// Load pressure accounting for duel energy if enabled
+#ifdef DE  // DE
+  Real const E          = dev_conserved[grid_enum::Energy * n_cells + id];
+  Real const gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id];
+
+  Real E_non_thermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(
+      loaded_data.density, loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z);
+
+  #ifdef MHD
+  E_non_thermal += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z);
+  #endif  // MHD
+
+  loaded_data.pressure   = hydro_utilities::Get_Pressure_From_DE(E, E - E_non_thermal, gas_energy, gamma);
+  loaded_data.gas_energy = gas_energy / loaded_data.density;
+#else  // not DE
+  #ifdef MHD
+  loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive(
+      dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x,
+      loaded_data.velocity_y, loaded_data.velocity_z, gamma, loaded_data.magnetic_x, loaded_data.magnetic_y,
+      loaded_data.magnetic_z);
+  #else   // not MHD
+  loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive(
+      dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x,
+      loaded_data.velocity_y, loaded_data.velocity_z, gamma);
+  #endif  // MHD
+#endif    // DE
+
+#ifdef SCALAR
+  for (size_t i = 0; i < grid_enum::nscalars; i++) {
+    loaded_data.scalar[i] = dev_conserved[(grid_enum::scalar + i) * n_cells + id] / loaded_data.density;
+  }
+#endif  // SCALAR
+
+  return loaded_data;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Compute a simple slope. Equation is `coef * (right - left)`.
+ *
+ * \param[in] left The data with the lower index (on the "left" side)
+ * \param[in] right The data with the higher index (on the "right" side)
+ * \param[in] coef The coefficient to multiply the slope by. Defaults to 1.0
+ * \return Primitive The slopes
+ */
+Primitive __device__ __host__ __inline__ Compute_Slope(Primitive const &left, Primitive const &right,
+                                                       Real const &coef = 1.0)
+{
+  Primitive slopes;
+
+  slopes.density    = coef * (right.density - left.density);
+  slopes.velocity_x = coef * (right.velocity_x - left.velocity_x);
+  slopes.velocity_y = coef * (right.velocity_y - left.velocity_y);
+  slopes.velocity_z = coef * (right.velocity_z - left.velocity_z);
+  slopes.pressure   = coef * (right.pressure - left.pressure);
+
+#ifdef MHD
+  slopes.magnetic_y = coef * (right.magnetic_y - left.magnetic_y);
+  slopes.magnetic_z = coef * (right.magnetic_z - left.magnetic_z);
+#endif  // MHD
+
+#ifdef DE
+  slopes.gas_energy = coef * (right.gas_energy - left.gas_energy);
+#endif  // DE
+
+#ifdef SCALAR
+  for (size_t i = 0; i < grid_enum::nscalars; i++) {
+    slopes.scalar[i] = coef * (right.scalar[i] - left.scalar[i]);
+  }
+#endif  // SCALAR
+
+  return slopes;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Compute the Van Lear slope from the left and right slopes
+ *
+ * \param[in] left_slope The left slope
+ * \param[in] right_slope The right slope
+ * \return Primitive The Van Leer slope
+ */
+Primitive __device__ __host__ __inline__ Van_Leer_Slope(Primitive const &left_slope, Primitive const &right_slope)
+{
+  Primitive vl_slopes;
+
+  auto Calc_Vl_Slope = [](Real const &left, Real const &right) -> Real {
+    if (left * right > 0.0) {
+      return 2.0 * left * right / (left + right);
+    } else {
+      return 0.0;
+    }
+  };
+
+  vl_slopes.density    = Calc_Vl_Slope(left_slope.density, right_slope.density);
+  vl_slopes.velocity_x = Calc_Vl_Slope(left_slope.velocity_x, right_slope.velocity_x);
+  vl_slopes.velocity_y = Calc_Vl_Slope(left_slope.velocity_y, right_slope.velocity_y);
+  vl_slopes.velocity_z = Calc_Vl_Slope(left_slope.velocity_z, right_slope.velocity_z);
+  vl_slopes.pressure   = Calc_Vl_Slope(left_slope.pressure, right_slope.pressure);
+
+#ifdef MHD
+  vl_slopes.magnetic_y = Calc_Vl_Slope(left_slope.magnetic_y, right_slope.magnetic_y);
+  vl_slopes.magnetic_z = Calc_Vl_Slope(left_slope.magnetic_z, right_slope.magnetic_z);
+#endif  // MHD
+
+#ifdef DE
+  vl_slopes.gas_energy = Calc_Vl_Slope(left_slope.gas_energy, right_slope.gas_energy);
+#endif  // DE
+
+#ifdef SCALAR
+  for (size_t i = 0; i < grid_enum::nscalars; i++) {
+    vl_slopes.scalar[i] = Calc_Vl_Slope(left_slope.scalar[i], right_slope.scalar[i]);
+  }
+#endif  // SCALAR
+
+  return vl_slopes;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Compute the eigenvectors in the given cell
+ *
+ * \param[in] primitive The primitive variables in a particular cell
+ * \param[in] sound_speed The sound speed
+ * \param[in] sound_speed_squared The sound speed squared
+ * \param[in] gamma The adiabatic index
+ * \return EigenVecs
+ */
+#ifdef MHD
+EigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, Real const &sound_speed,
+                                                     Real const &sound_speed_squared, Real const &gamma)
+{
+  EigenVecs output;
+  // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant
+
+  // Compute wave speeds and their squares
+  output.magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed(
+      primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma);
+  output.magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed(
+      primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma);
+
+  output.magnetosonic_speed_fast_squared = output.magnetosonic_speed_fast * output.magnetosonic_speed_fast;
+  output.magnetosonic_speed_slow_squared = output.magnetosonic_speed_slow * output.magnetosonic_speed_slow;
+
+  // Compute Alphas (equation A16)
+  if (Real const denom = (output.magnetosonic_speed_fast_squared - output.magnetosonic_speed_slow_squared),
+      numerator_2      = (output.magnetosonic_speed_fast_squared - sound_speed_squared);
+      denom <= 0.0 or numerator_2 <= 0.0) {
+    output.alpha_fast = 1.0;
+    output.alpha_slow = 0.0;
+  } else if (Real const numerator_1 = (sound_speed_squared - output.magnetosonic_speed_slow_squared);
+             numerator_1 <= 0.0) {
+    output.alpha_fast = 0.0;
+    output.alpha_slow = 1.0;
+  } else {
+    output.alpha_fast = sqrt(numerator_1 / denom);
+    output.alpha_slow = sqrt(numerator_2 / denom);
+  }
+
+  // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check
+  // handles that case
+  Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z);
+  output.beta_y         = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0;
+  output.beta_z         = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0;
+
+  // Compute Q(s) (equation A14)
+  output.sign         = copysign(1.0, primitive.magnetic_x);
+  output.n_fs         = 0.5 / sound_speed_squared;  // equation A19
+  output.q_prime_fast = output.sign * output.n_fs * output.alpha_fast * output.magnetosonic_speed_fast;
+  output.q_prime_slow = output.sign * output.n_fs * output.alpha_slow * output.magnetosonic_speed_slow;
+  output.q_fast       = output.sign * output.alpha_fast * output.magnetosonic_speed_fast;
+  output.q_slow       = output.sign * output.alpha_slow * output.magnetosonic_speed_slow;
+
+  // Compute A(s) (equation A15)
+  output.a_fast       = output.alpha_fast * sound_speed * sqrt(primitive.density);
+  output.a_slow       = output.alpha_slow * sound_speed * sqrt(primitive.density);
+  output.a_prime_fast = 0.5 * output.alpha_fast / (sound_speed * sqrt(primitive.density));
+  output.a_prime_slow = 0.5 * output.alpha_slow / (sound_speed * sqrt(primitive.density));
+
+  return output;
+}
+#endif  // MHD
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the
+ * eigenvectors given in Stone 2008, Appendix A
+ *
+ * \param[in] primitive The primitive variables
+ * \param[in] primitive_slope The primitive variables slopes
+ * \param[in] EigenVecs The eigenvectors
+ * \param[in] sound_speed The speed of sound
+ * \param[in] sound_speed_squared The speed of sound squared
+ * \param[in] gamma The adiabatic index
+ * \return Characteristic
+ */
+Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const &primitive,
+                                                                 Primitive const &primitive_slope,
+                                                                 EigenVecs const &eigen, Real const &sound_speed,
+                                                                 Real const &sound_speed_squared, Real const &gamma)
+{
+  Characteristic output;
+
+#ifdef MHD
+  // Multiply the slopes by the left eigenvector matrix given in equation 18
+  Real const inverse_sqrt_density = rsqrt(primitive.density);
+  output.a0 =
+      eigen.n_fs * eigen.alpha_fast *
+          (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) +
+      eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) +
+      eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z);
+
+  output.a1 =
+      0.5 *
+      (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_z) -
+       eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_y));
+
+  output.a2 =
+      eigen.n_fs * eigen.alpha_slow *
+          (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) -
+      eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) -
+      eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z);
+
+  output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared;
+
+  output.a4 =
+      eigen.n_fs * eigen.alpha_slow *
+          (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) +
+      eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) -
+      eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z);
+  output.a5 =
+      0.5 *
+      (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_z) -
+       eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_y));
+
+  output.a6 =
+      eigen.n_fs * eigen.alpha_fast *
+          (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) -
+      eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) +
+      eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z);
+
+#else   // not MHD
+  output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) +
+              primitive_slope.pressure / (2.0 * sound_speed_squared);
+  output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared);
+  output.a2 = primitive_slope.velocity_y;
+  output.a3 = primitive_slope.velocity_z;
+  output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) +
+              primitive_slope.pressure / (2.0 * sound_speed_squared);
+#endif  // MHD
+
+  return output;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the
+ * eigenvectors given in Stone 2008, Appendix A
+ *
+ * \param[in] primitive The primitive variables
+ * \param[in] characteristic_slope The characteristic slopes
+ * \param[in] eigen The eigenvectors
+ * \param[in] sound_speed The sound speed
+ * \param[in] sound_speed_squared The sound speed squared
+ * \param[in] gamma The adiabatic index
+ * \return Primitive The state in primitive variables
+ */
+Primitive __device__ __host__ __inline__ Characteristic_To_Primitive(Primitive const &primitive,
+                                                                     Characteristic const &characteristic_slope,
+                                                                     EigenVecs const &eigen, Real const &sound_speed,
+                                                                     Real const &sound_speed_squared, Real const &gamma)
+{
+  Primitive output;
+#ifdef MHD
+  // Multiply the slopes by the right eigenvector matrix given in equation 12
+  output.density = primitive.density * (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) +
+                                        eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) +
+                   characteristic_slope.a3;
+  output.velocity_x =
+      eigen.magnetosonic_speed_fast * eigen.alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) +
+      eigen.magnetosonic_speed_slow * eigen.alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2);
+  output.velocity_y = eigen.beta_y * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) +
+                                      eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) +
+                      eigen.beta_z * (characteristic_slope.a5 - characteristic_slope.a1);
+  output.velocity_z = eigen.beta_z * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) +
+                                      eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) +
+                      eigen.beta_y * (characteristic_slope.a1 - characteristic_slope.a5);
+  output.pressure = primitive.density * sound_speed_squared *
+                    (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) +
+                     eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4));
+  output.magnetic_y =
+      eigen.beta_y * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) -
+                      eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) -
+      eigen.beta_z * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1);
+  output.magnetic_z =
+      eigen.beta_z * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) -
+                      eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) +
+      eigen.beta_y * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1);
+
+#else   // not MHD
+  output.density    = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4;
+  output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0);
+  output.velocity_y = characteristic_slope.a2;
+  output.velocity_z = characteristic_slope.a3;
+  output.pressure   = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4);
+#endif  // MHD
+
+  return output;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Monotonize the characteristic slopes and project back into the primitive slopes
+ *
+ * \param[in] primitive The primitive variables
+ * \param[in] del_L The left primitive slopes
+ * \param[in] del_R The right primitive slopes
+ * \param[in] del_C The centered primitive slopes
+ * \param[in] del_G The Van Leer primitive slopes
+ * \param[in] del_a_L The left characteristic slopes
+ * \param[in] del_a_R The right characteristic slopes
+ * \param[in] del_a_C The centered characteristic slopes
+ * \param[in] del_a_G The Van Leer characteristic slopes
+ * \param[in] sound_speed The sound speed
+ * \param[in] sound_speed_squared The sound speed squared
+ * \param[in] gamma The adiabatic index
+ * \return Primitive The Monotonized primitive slopes
+ */
+Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive(
+    Primitive const &primitive, Primitive const &del_L, Primitive const &del_R, Primitive const &del_C,
+    Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, Characteristic const &del_a_C,
+    Characteristic const &del_a_G, EigenVecs const &eigenvectors, Real const &sound_speed,
+    Real const &sound_speed_squared, Real const &gamma)
+{
+  // The function that will actually do the monotozation
+  auto Monotonize = [](Real const &left, Real const &right, Real const &centered, Real const &van_leer) -> Real {
+    if (left * right > 0.0) {
+      Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right));
+      Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer));
+      return copysign(fmin(lim_slope_a, lim_slope_b), centered);
+    } else {
+      return 0.0;
+    }
+  };
+
+  // the monotonized difference in the characteristic variables
+  Characteristic del_a_m;
+
+  // Monotonize the slopes
+  del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0);
+  del_a_m.a1 = Monotonize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1);
+  del_a_m.a2 = Monotonize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2);
+  del_a_m.a3 = Monotonize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3);
+  del_a_m.a4 = Monotonize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4);
+
+#ifdef MHD
+  del_a_m.a5 = Monotonize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5);
+  del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6);
+#endif  // MHD
+
+  // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and
+  // scalars
+  Primitive output =
+      Characteristic_To_Primitive(primitive, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma);
+
+#ifdef DE
+  output.gas_energy = Monotonize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy);
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    output.scalar[i] = Monotonize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]);
+  }
+#endif  // SCALAR
+
+  return output;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Monotonize the parabolic interface states
+ *
+ * \param[in] cell_i The state in cell i
+ * \param[in] cell_im1 The state in cell i-1
+ * \param[in] cell_ip1 The state in cell i+1
+ * \param[in,out] interface_L_iph The left interface state at i+1/2
+ * \param[in,out] interface_R_imh The right interface state at i-1/2
+ * \return Primitive
+ */
+void __device__ __host__ __inline__ Monotonize_Parabolic_Interface(Primitive const &cell_i, Primitive const &cell_im1,
+                                                                   Primitive const &cell_ip1,
+                                                                   Primitive &interface_L_iph,
+                                                                   Primitive &interface_R_imh)
+{
+  // The function that will actually do the monotozation. Note the return by refernce of the interface state
+  auto Monotonize = [](Real const &state_i, Real const &state_im1, Real const &state_ip1, Real &interface_L,
+                       Real &interface_R) {
+    // Some terms we need for the comparisons
+    Real const term_1 = 6.0 * (interface_L - interface_R) * (state_i - 0.5 * (interface_R + interface_L));
+    Real const term_2 = pow(interface_L - interface_R, 2.0);
+
+    // First monotonicity constraint. Equations 47-49 in Stone et al. 2008
+    if ((interface_L - state_i) * (state_i - interface_R) <= 0.0) {
+      interface_L = state_i;
+      interface_R = state_i;
+    }
+    // Second monotonicity constraint. Equations 50 & 51 in Stone et al. 2008
+    else if (term_1 > term_2) {
+      interface_R = 3.0 * state_i - 2.0 * interface_L;
+    }
+    // Third monotonicity constraint. Equations 52 & 53 in Stone et al. 2008
+    else if (term_1 < -term_2) {
+      interface_L = 3.0 * state_i - 2.0 * interface_R;
+    }
+
+    // Bound the interface to lie between adjacent cell centered values
+    interface_R = fmax(fmin(state_i, state_im1), interface_R);
+    interface_R = fmin(fmax(state_i, state_im1), interface_R);
+    interface_L = fmax(fmin(state_i, state_ip1), interface_L);
+    interface_L = fmin(fmax(state_i, state_ip1), interface_L);
+  };
+
+  // Monotonize each interface state
+  Monotonize(cell_i.density, cell_im1.density, cell_ip1.density, interface_L_iph.density, interface_R_imh.density);
+  Monotonize(cell_i.velocity_x, cell_im1.velocity_x, cell_ip1.velocity_x, interface_L_iph.velocity_x,
+             interface_R_imh.velocity_x);
+  Monotonize(cell_i.velocity_y, cell_im1.velocity_y, cell_ip1.velocity_y, interface_L_iph.velocity_y,
+             interface_R_imh.velocity_y);
+  Monotonize(cell_i.velocity_z, cell_im1.velocity_z, cell_ip1.velocity_z, interface_L_iph.velocity_z,
+             interface_R_imh.velocity_z);
+  Monotonize(cell_i.pressure, cell_im1.pressure, cell_ip1.pressure, interface_L_iph.pressure, interface_R_imh.pressure);
+
+#ifdef MHD
+  Monotonize(cell_i.magnetic_y, cell_im1.magnetic_y, cell_ip1.magnetic_y, interface_L_iph.magnetic_y,
+             interface_R_imh.magnetic_y);
+  Monotonize(cell_i.magnetic_z, cell_im1.magnetic_z, cell_ip1.magnetic_z, interface_L_iph.magnetic_z,
+             interface_R_imh.magnetic_z);
+#endif  // MHD
+
+#ifdef DE
+  Monotonize(cell_i.gas_energy, cell_im1.gas_energy, cell_ip1.gas_energy, interface_L_iph.gas_energy,
+             interface_R_imh.gas_energy);
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    Monotonize(cell_i.scalar[i], cell_im1.scalar[i], cell_ip1.scalar[i], interface_L_iph.scalar[i],
+               interface_R_imh.scalar[i]);
+  }
+#endif  // SCALAR
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Compute the interface state from the slope and cell centered state using linear interpolation
+ *
+ * \param[in] primitive The cell centered state
+ * \param[in] slopes The slopes
+ * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it
+ * \return Primitive The interface state
+ */
+Primitive __device__ __host__ __inline__ Calc_Interface_Linear(Primitive const &primitive, Primitive const &slopes,
+                                                               Real const &sign)
+{
+  Primitive output;
+
+  auto interface = [&sign](Real const &state, Real const &slope) -> Real { return state + sign * 0.5 * slope; };
+
+  output.density    = interface(primitive.density, slopes.density);
+  output.velocity_x = interface(primitive.velocity_x, slopes.velocity_x);
+  output.velocity_y = interface(primitive.velocity_y, slopes.velocity_y);
+  output.velocity_z = interface(primitive.velocity_z, slopes.velocity_z);
+  output.pressure   = interface(primitive.pressure, slopes.pressure);
+
+#ifdef MHD
+  output.magnetic_y = interface(primitive.magnetic_y, slopes.magnetic_y);
+  output.magnetic_z = interface(primitive.magnetic_z, slopes.magnetic_z);
+#endif  // MHD
+
+#ifdef DE
+  output.gas_energy = interface(primitive.gas_energy, slopes.gas_energy);
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    output.scalar[i] = interface(primitive.scalar[i], slopes.scalar[i]);
+  }
+#endif  // SCALAR
+
+  return output;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Apply limiting the the primitive interfaces in PLM reconstructions
+ *
+ * \param[in,out] interface_L_iph The unlimited left plus 1/2 interface
+ * \param[in,out] interface_R_imh The unlimited right minus 1/2 interface
+ * \param[in] cell_imo The cell centered values at i-1
+ * \param[in] cell_i The cell centered values at i
+ * \param[in] cell_ipo The cell centered values at i+1
+ */
+void __device__ __host__ __inline__ Plm_Limit_Interfaces(Primitive &interface_L_iph, Primitive &interface_R_imh,
+                                                         Primitive const &cell_imo, Primitive const &cell_i,
+                                                         Primitive const &cell_ipo)
+{
+  auto limiter = [](Real &l_iph, Real &r_imh, Real const &val_imo, Real const &val_i, Real const &val_ipo) {
+    r_imh = fmax(fmin(val_i, val_imo), r_imh);
+    r_imh = fmin(fmax(val_i, val_imo), r_imh);
+    l_iph = fmax(fmin(val_i, val_ipo), l_iph);
+    l_iph = fmin(fmax(val_i, val_ipo), l_iph);
+  };
+
+  limiter(interface_L_iph.density, interface_R_imh.density, cell_imo.density, cell_i.density, cell_ipo.density);
+  limiter(interface_L_iph.velocity_x, interface_R_imh.velocity_x, cell_imo.velocity_x, cell_i.velocity_x,
+          cell_ipo.velocity_x);
+  limiter(interface_L_iph.velocity_y, interface_R_imh.velocity_y, cell_imo.velocity_y, cell_i.velocity_y,
+          cell_ipo.velocity_y);
+  limiter(interface_L_iph.velocity_z, interface_R_imh.velocity_z, cell_imo.velocity_z, cell_i.velocity_z,
+          cell_ipo.velocity_z);
+  limiter(interface_L_iph.pressure, interface_R_imh.pressure, cell_imo.pressure, cell_i.pressure, cell_ipo.pressure);
+
+#ifdef MHD
+  limiter(interface_L_iph.magnetic_y, interface_R_imh.magnetic_y, cell_imo.magnetic_y, cell_i.magnetic_y,
+          cell_ipo.magnetic_y);
+  limiter(interface_L_iph.magnetic_z, interface_R_imh.magnetic_z, cell_imo.magnetic_z, cell_i.magnetic_z,
+          cell_ipo.magnetic_z);
+#endif  // MHD
+
+#ifdef DE
+  limiter(interface_L_iph.gas_energy, interface_R_imh.gas_energy, cell_imo.gas_energy, cell_i.gas_energy,
+          cell_ipo.gas_energy);
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    limiter(interface_L_iph.scalar[i], interface_R_imh.scalar[i], cell_imo.scalar[i], cell_i.scalar[i],
+            cell_ipo.scalar[i]);
+  }
+#endif  // SCALAR
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Compute the interface state for the CTU version fo the reconstructor from the slope and cell centered state
+ * using parabolic interpolation
+ *
+ * \param[in] cell_i The state in cell i
+ * \param[in] cell_im1 The state in cell i-1
+ * \param[in] slopes_i The slopes in cell i
+ * \param[in] slopes_im1 The slopes in cell i-1
+ * \return Primitive The interface state
+ */
+Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic(Primitive const &cell_i, Primitive const &cell_im1,
+                                                                  Primitive const &slopes_i,
+                                                                  Primitive const &slopes_im1)
+{
+  Primitive output;
+
+  auto interface = [](Real const &state_i, Real const &state_im1, Real const &slope_i, Real const &slope_im1) -> Real {
+    return 0.5 * (state_i + state_im1) - (slope_i - slope_im1) / 6.0;
+  };
+
+  output.density    = interface(cell_i.density, cell_im1.density, slopes_i.density, slopes_im1.density);
+  output.velocity_x = interface(cell_i.velocity_x, cell_im1.velocity_x, slopes_i.velocity_x, slopes_im1.velocity_x);
+  output.velocity_y = interface(cell_i.velocity_y, cell_im1.velocity_y, slopes_i.velocity_y, slopes_im1.velocity_y);
+  output.velocity_z = interface(cell_i.velocity_z, cell_im1.velocity_z, slopes_i.velocity_z, slopes_im1.velocity_z);
+  output.pressure   = interface(cell_i.pressure, cell_im1.pressure, slopes_i.pressure, slopes_im1.pressure);
+
+#ifdef MHD
+  output.magnetic_y = interface(cell_i.magnetic_y, cell_im1.magnetic_y, slopes_i.magnetic_y, slopes_im1.magnetic_y);
+  output.magnetic_z = interface(cell_i.magnetic_z, cell_im1.magnetic_z, slopes_i.magnetic_z, slopes_im1.magnetic_z);
+#endif  // MHD
+
+#ifdef DE
+  output.gas_energy = interface(cell_i.gas_energy, cell_im1.gas_energy, slopes_i.gas_energy, slopes_im1.gas_energy);
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    output.scalar[i] = interface(cell_i.scalar[i], cell_im1.scalar[i], slopes_i.scalar[i], slopes_im1.scalar[i]);
+  }
+#endif  // SCALAR
+
+  return output;
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Compute the PPM interface state for a given field/stencil.
+ *
+ * \details This method is heavily based on the implementation in Athena++. See the following papers for details
+ * - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained
+ * transport", JCP, 375, (2018)
+ * - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174
+ * (1984)
+ * - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008)
+ * - P. McCorquodale & P. Colella,  "A high-order finite-volume method for conservation laws on locally refined
+ * grids", CAMCoS, 6, 1 (2011)
+ * - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP,
+ * 230, 2952 (2011)
+ *
+ * \param[in] cell_im2 The value of the field/stencil at i-2
+ * \param[in] cell_im1 The value of the field/stencil at i-1
+ * \param[in] cell_i The value of the field/stencil at i
+ * \param[in] cell_ip1 The value of the field/stencil at i+1
+ * \param[in] cell_ip2 The value of the field/stencil at i+2
+ * \param[out] interface_L_iph The left interface at the i+1/2 face
+ * \param[out] interface_R_imh The right interface at the i-1/2 face
+ */
+void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Real const &cell_im1, Real const &cell_i,
+                                                        Real const &cell_ip1, Real const &cell_ip2,
+                                                        Real &interface_L_iph, Real &interface_R_imh)
+{
+  // Let's start by setting up some things that we'll need later
+
+  // Colella & Sekora 2008 constant used in second derivative limiter
+  Real const C2 = 1.25;
+
+  // This lambda function is used for limiting the interfaces
+  auto limit_interface = [&C2](Real const &cell_i, Real const &cell_im1, Real const &interface, Real const &slope_2nd_i,
+                               Real const &slope_2nd_im1) -> Real {
+    // Colella et al. 2011 eq. 85b.
+    // 85a is slope_2nd_im1 and 85c is slope_2nd_i
+    Real slope_2nd_centered = 3.0 * (cell_im1 + cell_i - 2.0 * interface);
+
+    Real limited_slope = 0.0;
+    if (SIGN(slope_2nd_centered) == SIGN(slope_2nd_im1) and SIGN(slope_2nd_centered) == SIGN(slope_2nd_i)) {
+      limited_slope = SIGN(slope_2nd_centered) *
+                      fmin(C2 * abs(slope_2nd_im1), fmin(C2 * abs(slope_2nd_i), abs(slope_2nd_centered)));
+    }
+
+    // Collela et al. 2011 eq. 84a & 84b
+    Real const diff_left  = interface - cell_im1;
+    Real const diff_right = cell_i - interface;
+    if (diff_left * diff_right < 0.0) {
+      // Local extrema detected at the interface
+      return 0.5 * (cell_im1 + cell_i) - limited_slope / 6.0;
+    } else {
+      return interface;
+    }
+  };
+
+  // Now that the setup is done we can start computing the interface states
+
+  // Compute average slopes
+  Real const slope_left    = (cell_i - cell_im1);
+  Real const slope_right   = (cell_ip1 - cell_i);
+  Real const slope_avg_im1 = 0.5 * slope_left + 0.5 * (cell_im1 - cell_im2);
+  Real const slope_avg_i   = 0.5 * slope_right + 0.5 * slope_left;
+  Real const slope_avg_ip1 = 0.5 * (cell_ip2 - cell_ip1) + 0.5 * slope_right;
+
+  // Approximate interface average at i-1/2 and i+1/2 using PPM
+  // P. Colella & P. Woodward 1984 eq. 1.6
+  interface_R_imh = 0.5 * (cell_im1 + cell_i) + (slope_avg_im1 - slope_avg_i) / 6.0;
+  interface_L_iph = 0.5 * (cell_i + cell_ip1) + (slope_avg_i - slope_avg_ip1) / 6.0;
+
+  // Limit interpolated interface states (Colella et al. 2011 section 4.3.1)
+
+  // Approximate second derivative at interfaces for smooth extrema preservation
+  // Colella et al. 2011 eq 85a
+  Real const slope_2nd_im1 = cell_im2 + cell_i - 2.0 * cell_im1;
+  Real const slope_2nd_i   = cell_im1 + cell_ip1 - 2.0 * cell_i;
+  Real const slope_2nd_ip1 = cell_i + cell_ip2 - 2.0 * cell_ip1;
+
+  interface_R_imh = limit_interface(cell_i, cell_im1, interface_R_imh, slope_2nd_i, slope_2nd_im1);
+  interface_L_iph = limit_interface(cell_ip1, cell_i, interface_L_iph, slope_2nd_ip1, slope_2nd_i);
+
+  // Compute cell-centered difference stencils (McCorquodale & Colella 2011 section 2.4.1)
+
+  // Apply Colella & Sekora limiters to parabolic interpolant
+  Real slope_2nd_face = 6.0 * (interface_R_imh + interface_L_iph - 2.0 * cell_i);
+
+  Real slope_2nd_limited = 0.0;
+  if (SIGN(slope_2nd_im1) == SIGN(slope_2nd_i) and SIGN(slope_2nd_im1) == SIGN(slope_2nd_ip1) and
+      SIGN(slope_2nd_im1) == SIGN(slope_2nd_face)) {
+    // Extrema is smooth
+    // Colella & Sekora eq. 22
+    slope_2nd_limited = SIGN(slope_2nd_face) * fmin(fmin(C2 * abs(slope_2nd_im1), C2 * abs(slope_2nd_i)),
+                                                    fmin(C2 * abs(slope_2nd_ip1), abs(slope_2nd_face)));
+  }
+
+  // Check if 2nd derivative is close to roundoff error
+  Real cell_max = fmax(abs(cell_im2), abs(cell_im1));
+  cell_max      = fmax(cell_max, abs(cell_i));
+  cell_max      = fmax(cell_max, abs(cell_ip1));
+  cell_max      = fmax(cell_max, abs(cell_ip2));
+
+  // If this condition is true then the limiter is not sensitive to roundoff and we use the limited ratio
+  // McCorquodale & Colella 2011 eq. 27
+  Real const rho = (abs(slope_2nd_face) > (1.0e-12) * cell_max) ? slope_2nd_limited / slope_2nd_face : 0.0;
+
+  // Colella & Sekora eq. 25
+  Real slope_face_left  = cell_i - interface_R_imh;
+  Real slope_face_right = interface_L_iph - cell_i;
+
+  // Check for local extrema
+  if ((slope_face_left * slope_face_right) <= 0.0 or ((cell_ip1 - cell_i) * (cell_i - cell_im1)) <= 0.0) {
+    // Extrema detected
+    // Check if relative change in limited 2nd deriv is > roundoff
+    if (rho <= (1.0 - (1.0e-12))) {
+      // Limit smooth extrema
+      // Colella & Sekora eq. 23
+      interface_R_imh = cell_i - rho * slope_face_left;
+      interface_L_iph = cell_i + rho * slope_face_right;
+    }
+  } else {
+    // No extrema detected
+    // Overshoot i-1/2,R / i,(-) state
+    if (abs(slope_face_left) >= 2.0 * abs(slope_face_right)) {
+      interface_R_imh = cell_i - 2.0 * slope_face_right;
+    }
+    // Overshoot i+1/2,L / i,(+) state
+    if (abs(slope_face_right) >= 2.0 * abs(slope_face_left)) {
+      interface_L_iph = cell_i + 2.0 * slope_face_left;
+    }
+  }
+}
+// =====================================================================================================================
+
+// =====================================================================================================================
+/*!
+ * \brief Write the interface data to the appropriate arrays
+ *
+ * \param[in] interface_state The interface state to write
+ * \param[out] dev_interface The interface array
+ * \param[in] dev_conserved The conserved variables
+ * \param[in] id The cell id to write to
+ * \param[in] n_cells The total number of cells
+ * \param[in] o1 Directional parameter
+ * \param[in] o2 Directional parameter
+ * \param[in] o3 Directional parameter
+ * \param[in] gamma The adiabatic index
+ */
+void __device__ __host__ __inline__ Write_Data(Primitive const &interface_state, Real *dev_interface,
+                                               Real const *dev_conserved, size_t const &id, size_t const &n_cells,
+                                               size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma)
+{
+  // Write out density and momentum
+  dev_interface[grid_enum::density * n_cells + id] = interface_state.density;
+  dev_interface[o1 * n_cells + id]                 = interface_state.density * interface_state.velocity_x;
+  dev_interface[o2 * n_cells + id]                 = interface_state.density * interface_state.velocity_y;
+  dev_interface[o3 * n_cells + id]                 = interface_state.density * interface_state.velocity_z;
+
+#ifdef MHD
+  // Write the Y and Z interface states and load the X magnetic face needed to compute the energy
+  Real magnetic_x;
+  switch (o1) {
+    case grid_enum::momentum_x:
+      dev_interface[grid_enum::Q_x_magnetic_y * n_cells + id] = interface_state.magnetic_y;
+      dev_interface[grid_enum::Q_x_magnetic_z * n_cells + id] = interface_state.magnetic_z;
+      magnetic_x                                              = dev_conserved[grid_enum::magnetic_x * n_cells + id];
+      break;
+    case grid_enum::momentum_y:
+      dev_interface[grid_enum::Q_y_magnetic_z * n_cells + id] = interface_state.magnetic_y;
+      dev_interface[grid_enum::Q_y_magnetic_x * n_cells + id] = interface_state.magnetic_z;
+      magnetic_x                                              = dev_conserved[grid_enum::magnetic_y * n_cells + id];
+      break;
+    case grid_enum::momentum_z:
+      dev_interface[grid_enum::Q_z_magnetic_x * n_cells + id] = interface_state.magnetic_y;
+      dev_interface[grid_enum::Q_z_magnetic_y * n_cells + id] = interface_state.magnetic_z;
+      magnetic_x                                              = dev_conserved[grid_enum::magnetic_z * n_cells + id];
+      break;
+  }
+
+  // Compute the MHD energy
+  dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive(
+      interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y,
+      interface_state.velocity_z, gamma, magnetic_x, interface_state.magnetic_y, interface_state.magnetic_z);
+#else   // not MHD
+  // Compute the hydro energy
+  dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive(
+      interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y,
+      interface_state.velocity_z, gamma);
+#endif  // MHD
+
+#ifdef DE
+  dev_interface[grid_enum::GasEnergy * n_cells + id] = interface_state.density * interface_state.gas_energy;
+#endif  // DE
+#ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    dev_interface[(grid_enum::scalar + i) * n_cells + id] = interface_state.density * interface_state.scalar[i];
+  }
+#endif  // SCALAR
+}
+// =====================================================================================================================
+}  // namespace reconstruction
diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu
new file mode 100644
index 000000000..dc1f10720
--- /dev/null
+++ b/src/reconstruction/reconstruction_tests.cu
@@ -0,0 +1,682 @@
+/*!
+ * \file reconstruction_tests.cu
+ * \brief Tests for the contents of reconstruction.h
+ *
+ */
+
+// STL Includes
+#include <algorithm>
+#include <string>
+#include <vector>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../global/global.h"
+#include "../global/global_cuda.h"
+#include "../io/io.h"
+#include "../reconstruction/reconstruction.h"
+#include "../utils/DeviceVector.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
+#include "../utils/testing_utilities.h"
+
+#ifdef MHD
+__global__ void Test_Prim_2_Char(reconstruction::Primitive const primitive,
+                                 reconstruction::Primitive const primitive_slope,
+                                 reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed,
+                                 Real const sound_speed_squared, reconstruction::Characteristic *characteristic_slope)
+{
+  *characteristic_slope = reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, eigenvectors,
+                                                                      sound_speed, sound_speed_squared, gamma);
+}
+
+__global__ void Test_Char_2_Prim(reconstruction::Primitive const primitive,
+                                 reconstruction::Characteristic const characteristic_slope,
+                                 reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed,
+                                 Real const sound_speed_squared, reconstruction::Primitive *primitive_slope)
+{
+  *primitive_slope = reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, eigenvectors,
+                                                                 sound_speed, sound_speed_squared, gamma);
+}
+
+__global__ void Test_Compute_Eigenvectors(reconstruction::Primitive const primitive, Real const sound_speed,
+                                          Real const sound_speed_squared, Real const gamma,
+                                          reconstruction::EigenVecs *eigenvectors)
+{
+  *eigenvectors = reconstruction::Compute_Eigenvectors(primitive, sound_speed, sound_speed_squared, gamma);
+}
+
+TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput)
+{
+  // Test parameters
+  Real const &gamma = 5. / 3.;
+  reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8};
+  reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16};
+  reconstruction::EigenVecs const eigenvectors{
+      17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+  };
+  Real const sound_speed         = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma);
+  Real const sound_speed_squared = sound_speed * sound_speed;
+
+  // Run test
+  cuda_utilities::DeviceVector<reconstruction::Characteristic> dev_results(1);
+  hipLaunchKernelGGL(Test_Prim_2_Char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, sound_speed,
+                     sound_speed_squared, dev_results.data());
+  GPU_Error_Check();
+  cudaDeviceSynchronize();
+  reconstruction::Characteristic const host_results = dev_results.at(0);
+
+  // Check results
+  reconstruction::Characteristic const fiducial_results{-40327, 110, -132678, 7.4400000000000004, 98864, 98, 103549};
+  testing_utilities::Check_Results(fiducial_results.a0, host_results.a0, "a0");
+  testing_utilities::Check_Results(fiducial_results.a1, host_results.a1, "a1");
+  testing_utilities::Check_Results(fiducial_results.a2, host_results.a2, "a2");
+  testing_utilities::Check_Results(fiducial_results.a3, host_results.a3, "a3");
+  testing_utilities::Check_Results(fiducial_results.a4, host_results.a4, "a4");
+  testing_utilities::Check_Results(fiducial_results.a5, host_results.a5, "a5");
+  testing_utilities::Check_Results(fiducial_results.a6, host_results.a6, "a6");
+}
+
+TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput)
+{
+  // Test parameters
+  Real const &gamma = 5. / 3.;
+  reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8};
+  reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23};
+  reconstruction::EigenVecs const eigenvectors{
+      17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+  };
+  Real const sound_speed         = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma);
+  Real const sound_speed_squared = sound_speed * sound_speed;
+
+  // Run test
+  cuda_utilities::DeviceVector<reconstruction::Primitive> dev_results(1);
+  hipLaunchKernelGGL(Test_Char_2_Prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, sound_speed,
+                     sound_speed_squared, dev_results.data());
+  GPU_Error_Check();
+  cudaDeviceSynchronize();
+  reconstruction::Primitive const host_results = dev_results.at(0);
+
+  // Check results
+  reconstruction::Primitive const fiducial_results{1740, 2934, -2526, -2828, 14333.333333333338, 0.0, -24040, 24880};
+  testing_utilities::Check_Results(fiducial_results.density, host_results.density, "density");
+  testing_utilities::Check_Results(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14);
+  testing_utilities::Check_Results(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14);
+  testing_utilities::Check_Results(fiducial_results.pressure, host_results.pressure, "pressure");
+  testing_utilities::Check_Results(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y");
+  testing_utilities::Check_Results(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z");
+}
+
+TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput)
+{
+  // Test parameters
+  Real const &gamma = 5. / 3.;
+  reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8};
+  Real const sound_speed         = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma);
+  Real const sound_speed_squared = sound_speed * sound_speed;
+
+  // Run test
+  cuda_utilities::DeviceVector<reconstruction::EigenVecs> dev_results(1);
+  hipLaunchKernelGGL(Test_Compute_Eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma,
+                     dev_results.data());
+  GPU_Error_Check();
+  cudaDeviceSynchronize();
+  reconstruction::EigenVecs const host_results = dev_results.at(0);
+  // std::cout << to_string_exact(host_results.magnetosonic_speed_fast) << ",";
+  // std::cout << to_string_exact(host_results.magnetosonic_speed_slow) << ",";
+  // std::cout << to_string_exact(host_results.magnetosonic_speed_fast_squared) << ",";
+  // std::cout << to_string_exact(host_results.magnetosonic_speed_slow_squared) << ",";
+  // std::cout << to_string_exact(host_results.alpha_fast) << ",";
+  // std::cout << to_string_exact(host_results.alpha_slow) << ",";
+  // std::cout << to_string_exact(host_results.beta_y) << ",";
+  // std::cout << to_string_exact(host_results.beta_z) << ",";
+  // std::cout << to_string_exact(host_results.n_fs) << ",";
+  // std::cout << to_string_exact(host_results.sign) << ",";
+  // std::cout << to_string_exact(host_results.q_fast) << ",";
+  // std::cout << to_string_exact(host_results.q_slow) << ",";
+  // std::cout << to_string_exact(host_results.a_fast) << ",";
+  // std::cout << to_string_exact(host_results.a_slow) << ",";
+  // std::cout << to_string_exact(host_results.q_prime_fast) << ",";
+  // std::cout << to_string_exact(host_results.q_prime_slow) << ",";
+  // std::cout << to_string_exact(host_results.a_prime_fast) << ",";
+  // std::cout << to_string_exact(host_results.a_prime_slow) << "," << std::endl;
+  // Check results
+  reconstruction::EigenVecs const fiducial_results{
+      12.466068627219666,   1.3894122191714398,  155.40286701855041,  1.9304663147829049,   0.20425471836256681,
+      0.97891777490585408,  0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1,
+      2.546253336541183,    1.3601203180183106,  0.58963258314939582, 2.825892204282022,    0.15277520019247093,
+      0.081607219081098623, 0.03537795498896374, 0.1695535322569213};
+  testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast,
+                                   "magnetosonic_speed_fast");
+  testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow,
+                                   "magnetosonic_speed_slow");
+  testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_fast_squared,
+                                   host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared");
+  testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_slow_squared,
+                                   host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared");
+  testing_utilities::Check_Results(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast");
+  testing_utilities::Check_Results(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow");
+  testing_utilities::Check_Results(fiducial_results.beta_y, host_results.beta_y, "beta_y");
+  testing_utilities::Check_Results(fiducial_results.beta_z, host_results.beta_z, "beta_z");
+  testing_utilities::Check_Results(fiducial_results.n_fs, host_results.n_fs, "n_fs");
+  testing_utilities::Check_Results(fiducial_results.sign, host_results.sign, "sign");
+  testing_utilities::Check_Results(fiducial_results.q_fast, host_results.q_fast, "q_fast");
+  testing_utilities::Check_Results(fiducial_results.q_slow, host_results.q_slow, "q_slow");
+  testing_utilities::Check_Results(fiducial_results.a_fast, host_results.a_fast, "a_fast");
+  testing_utilities::Check_Results(fiducial_results.a_slow, host_results.a_slow, "a_slow");
+  testing_utilities::Check_Results(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast");
+  testing_utilities::Check_Results(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow");
+  testing_utilities::Check_Results(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast");
+  testing_utilities::Check_Results(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow");
+}
+#endif  // MHD
+
+TEST(tALLReconstructionThreadGuard, CorrectInputExpectCorrectOutput)
+{
+  // Test parameters
+  int const order = 3;
+  int const nx    = 6;
+  int const ny    = 6;
+  int const nz    = 6;
+
+  // fiducial data
+  std::vector<int> fiducial_vals(nx * ny * nz, 1);
+  fiducial_vals.at(86) = 0;
+
+  // loop through all values of the indices and check them
+  for (int xid = 0; xid < nx; xid++) {
+    for (int yid = 0; yid < ny; yid++) {
+      for (int zid = 0; zid < nz; zid++) {
+        // Get the test value
+        bool test_val = reconstruction::Thread_Guard<order>(nx, ny, nz, xid, yid, zid);
+
+        // Compare
+        int id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
+        ASSERT_EQ(test_val, fiducial_vals.at(id))
+            << "Test value not equal to fiducial value at id = " << id << std::endl;
+      }
+    }
+  }
+}
+
+TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput)
+{
+  // Set up test and mock up grid
+  size_t const nx = 3, ny = 3, nz = 3;
+  size_t const n_cells = nx * ny * nz;
+  size_t const xid = 1, yid = 1, zid = 1;
+  size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z;
+  Real const gamma = 5. / 3.;
+
+  std::vector<Real> conserved(n_cells * grid_enum::num_fields);
+  std::iota(conserved.begin(), conserved.end(), 0.0);
+
+  // Up the energy part of the grid to avoid negative pressure
+  for (size_t i = grid_enum::Energy * n_cells; i < (grid_enum::Energy + 1) * n_cells; i++) {
+    conserved.at(i) *= 5.0E2;
+  }
+
+  // Get test data
+  auto const test_data = reconstruction::Load_Data(conserved.data(), xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma);
+
+// Check results
+#ifdef MHD
+  reconstruction::Primitive const fiducial_data{
+      13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 9662.3910256410272, 147.5, 173.5, 197.5};
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+  testing_utilities::Check_Results(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x");
+  testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y");
+  testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z");
+#else  // MHD
+  reconstruction::Primitive fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308,
+                                          39950.641025641031};
+  #ifdef DE
+  fiducial_data.pressure = 39950.641025641031;
+  #endif  // DE
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+#endif    // MHD
+}
+
+TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput)
+{
+// Setup input data
+#ifdef MHD
+  reconstruction::Primitive left{6, 7, 8, 9, 10, 11, 12, 13};
+  reconstruction::Primitive right{1, 2, 3, 4, 5, 6, 7, 8};
+#else   // MHD
+  reconstruction::Primitive left{6, 7, 8, 9, 10};
+  reconstruction::Primitive right{1, 2, 3, 4, 5};
+#endif  // MHD
+  Real const coef = 0.5;
+
+  // Get test data
+  auto test_data = reconstruction::Compute_Slope(left, right, coef);
+
+  // Check results
+#ifdef MHD
+  Real const fiducial_data = -2.5;
+  testing_utilities::Check_Results(fiducial_data, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data, test_data.pressure, "pressure");
+  testing_utilities::Check_Results(fiducial_data, test_data.magnetic_y, "magnetic_y");
+  testing_utilities::Check_Results(fiducial_data, test_data.magnetic_z, "magnetic_z");
+#else   // MHD
+  Real const fiducial_data = -2.5;
+  testing_utilities::Check_Results(fiducial_data, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data, test_data.pressure, "pressure");
+#endif  // MHD
+}
+
+TEST(tALLReconstructionVanLeerSlope, CorrectInputExpectCorrectOutput)
+{
+// Setup input data
+#ifdef MHD
+  reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8};
+  reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13};
+#else   // MHD
+  reconstruction::Primitive left{1, 2, 3, 4, 5};
+  reconstruction::Primitive right{6, 7, 8, 9, 10};
+#endif  // MHD
+
+  // Get test data
+  auto test_data = reconstruction::Van_Leer_Slope(left, right);
+
+  // Check results
+#ifdef MHD
+  reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633,
+                                                5.5384615384615383, 6.666666666666667,  0,
+                                                8.8421052631578956, 9.9047619047619051};
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+  testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y");
+  testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z");
+#else   // MHD
+  reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633,
+                                                5.5384615384615383, 6.666666666666667};
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+#endif  // MHD
+}
+
+__global__ void Test_Monotize_Characteristic_Return_Primitive(
+    reconstruction::Primitive const primitive, reconstruction::Primitive const del_L,
+    reconstruction::Primitive const del_R, reconstruction::Primitive const del_C, reconstruction::Primitive const del_G,
+    reconstruction::Characteristic const del_a_L, reconstruction::Characteristic const del_a_R,
+    reconstruction::Characteristic const del_a_C, reconstruction::Characteristic const del_a_G,
+    reconstruction::EigenVecs const eigenvectors, Real const sound_speed, Real const sound_speed_squared,
+    Real const gamma, reconstruction::Primitive *monotonized_slope)
+{
+  *monotonized_slope = reconstruction::Monotonize_Characteristic_Return_Primitive(
+      primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed,
+      sound_speed_squared, gamma);
+}
+
+TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpectCorrectOutput)
+{
+#ifdef MHD
+  reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8};
+  reconstruction::Primitive const del_L{9, 10, 11, 12, 13, 14, 15, 16};
+  reconstruction::Primitive const del_R{17, 18, 19, 20, 21, 22, 23, 24};
+  reconstruction::Primitive const del_C{25, 26, 27, 28, 29, 30, 31, 32};
+  reconstruction::Primitive const del_G{33, 34, 35, 36, 37, 38, 39, 40};
+  reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45, 46, 47};
+  reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52, 53, 54};
+  reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59, 60, 61};
+  reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67, 68, 69};
+#else   // MHD
+  reconstruction::Primitive const primitive{1, 2, 3, 4, 5};
+  reconstruction::Primitive const del_L{9, 10, 11, 12, 13};
+  reconstruction::Primitive const del_R{17, 18, 19, 20, 21};
+  reconstruction::Primitive const del_C{25, 26, 27, 28, 29};
+  reconstruction::Primitive const del_G{33, 34, 35, 36, 37};
+  reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45};
+  reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52};
+  reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59};
+  reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67};
+#endif  // MHD
+  Real const sound_speed = 17.0, sound_speed_squared = sound_speed * sound_speed;
+  Real const gamma = 5. / 3.;
+  reconstruction::EigenVecs const eigenvectors{
+      17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+  };
+
+  // Get test data
+  cuda_utilities::DeviceVector<reconstruction::Primitive> dev_results(1);
+  hipLaunchKernelGGL(Test_Monotize_Characteristic_Return_Primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G,
+                     del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, sound_speed_squared, gamma,
+                     dev_results.data());
+  GPU_Error_Check();
+  cudaDeviceSynchronize();
+  reconstruction::Primitive const host_results = dev_results.at(0);
+
+  // Check results
+#ifdef MHD
+  reconstruction::Primitive const fiducial_data{5046, 2934, -2526, -2828, 1441532, 0.0, -69716, 72152};
+  testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure");
+  testing_utilities::Check_Results(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y");
+  testing_utilities::Check_Results(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z");
+#else   // MHD
+  reconstruction::Primitive const fiducial_data{170, 68, 57, 58, 32946};
+  testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure");
+#endif  // MHD
+}
+
+TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput)
+{
+  // Input Data
+
+  reconstruction::Primitive const cell_i{1.4708046701, 9.5021020181, 3.7123503442, 4.6476103466, 3.7096802847};
+  reconstruction::Primitive const cell_im1{3.9547588941, 3.1552319951, 3.0209247624, 9.5841013261, 2.2945188332};
+  reconstruction::Primitive const cell_ip1{5.1973323534, 6.9132613767, 1.8397298636, 5.341960387, 9.093498542};
+  reconstruction::Primitive interface_L_iph{6.7787324804, 9.5389820358, 9.8522754567, 7.8305142852, 2.450533435};
+  reconstruction::Primitive interface_R_imh{4.8015193892, 5.9124263972, 8.7513040382, 8.3659359773, 1.339777121};
+
+  // Get test data
+  reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh);
+
+  // Check results
+  reconstruction::Primitive const fiducial_interface_L{1.4708046700999999, 9.5021020181000004, 3.7123503441999999,
+                                                       4.6476103465999996, 3.7096802847000001};
+  reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999,
+                                                       4.6476103465999996, 3.7096802847000001};
+  testing_utilities::Check_Results(fiducial_interface_L.density, interface_L_iph.density, "density");
+  testing_utilities::Check_Results(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure");
+
+  testing_utilities::Check_Results(fiducial_interface_R.density, interface_R_imh.density, "density");
+  testing_utilities::Check_Results(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure");
+}
+
+TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput)
+{
+  // Setup input data
+#ifdef MHD
+  reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8};
+  reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13};
+#else   // MHD
+  reconstruction::Primitive left{1, 2, 3, 4, 5};
+  reconstruction::Primitive right{6, 7, 8, 9, 10};
+#endif  // MHD
+  Real const coef = 0.5;
+
+  // Get test data
+  auto test_data = reconstruction::Calc_Interface_Linear(left, right, coef);
+
+  // Check results
+#ifdef MHD
+  reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5, 0, 10, 11.25};
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+  testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y");
+  testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z");
+#else   // MHD
+  reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5};
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+#endif  // MHD
+}
+
+TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput)
+{
+  // Setup input data
+#ifdef MHD
+  reconstruction::Primitive cell_i{1, 2, 3, 4, 5, 6, 7, 8};
+  reconstruction::Primitive cell_im1{6, 7, 8, 9, 10, 11, 12, 13};
+  reconstruction::Primitive slopes_i{14, 15, 16, 17, 18, 19, 20, 21};
+  reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26, 27, 28, 29};
+#else   // MHD
+  reconstruction::Primitive cell_i{1, 2, 3, 4, 5};
+  reconstruction::Primitive cell_im1{6, 7, 8, 9, 10};
+  reconstruction::Primitive slopes_i{14, 15, 16, 17, 18};
+  reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26};
+#endif  // MHD
+
+  // Get test data
+  auto test_data = reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, slopes_i, slopes_im1);
+
+  // Check results
+#ifdef MHD
+  reconstruction::Primitive const fiducial_data{4.833333333333333,  5.833333333333333,  6.833333333333333,
+                                                7.833333333333333,  8.8333333333333339, 0.0,
+                                                10.833333333333334, 11.833333333333334};
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+  testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y");
+  testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z");
+#else   // MHD
+  reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333,
+                                                7.833333333333333, 8.8333333333333339};
+  testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density");
+  testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x");
+  testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y");
+  testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z");
+  testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure");
+#endif  // MHD
+}
+
+TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput)
+{
+  // Set up PRNG to use
+  std::mt19937_64 prng(42);
+  std::uniform_real_distribution<double> doubleRand(-100, 100);
+
+  // Set up testing parameters
+  size_t const n_tests = 100;
+  std::vector<double> input_data(n_tests * 5);
+  for (double &val : input_data) {
+    val = doubleRand(prng);
+  }
+
+  std::vector<double> fiducial_left_interface{
+      50.429040149605328,  -40.625142952817804, 37.054257344499717,  -55.796322960572695, -14.949021655598202,
+      -10.760611497035882, 71.107183338735751,  -29.453314279116661, 7.38606168778702,    -23.210826670297152,
+      -85.15197822983292,  18.98804944849401,   64.754272117396766,  4.5584678980835918,  45.81912726561103,
+      58.769584663215738,  47.626531326553447,  9.3792919223901166,  47.06767164062336,   -53.975231802858218,
+      -81.51278133300454,  -74.554960772880221, 96.420244795844823,  37.498528618937456,  -41.370881014041672,
+      -41.817524439980467, 58.391560533135817,  -85.991024651293131, -12.674113472365306, 30.421304081280084,
+      43.700175645941769,  58.342347077360131,  -31.574197692184548, 98.151410701129635,  -9.4994975790183389,
+      -87.49117921577357,  -94.449608348937488, 79.849643090061676,  93.096197902468759,  -64.374502025066192,
+      82.037247010307937,  -60.629868182203786, -41.343090531127039, -75.449850543801574, -82.52313028208863,
+      19.871484181185011,  -22.253989777496159, 86.943333900988137,  -83.887344220269938, 73.270857190511975,
+      84.784625452008811,  -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134,
+      47.637114710282589,  -69.50911815749248,  -69.848254012650372, -7.4520009269431711, 90.887158278825865,
+      -50.671539065300863, 13.424189957034622,  80.237684918029572,  32.454734198410179,  66.84741286999801,
+      24.53669768915492,   -67.195147776790975, 72.277527112459907,  -46.094192444366435, -99.915875366345205,
+      32.244024128018054,  -95.648868731550635, 17.922876720365402,  -86.334093878928797, -16.580223524066724,
+      39.48244113577249,   64.203567686297504,  23.62791013796798,   59.620571575902432,  41.0983082454959,
+      -30.533954819557593, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068,
+      -39.186662569988762, 76.266375356625161,  -51.650172854435624, -68.894636301310584, 98.410134045837452,
+      -49.167117951549066, 78.440749922366507,  51.390453104722326,  3.1993391287610393,  43.749856317813453,
+      -81.399433434996496, 88.385686355761862,  78.242223440453444,  27.539590130937498,  -6.9781781598207147,
+  };
+  std::vector<double> fiducial_right_interface{
+      50.429040149605328,  4.4043935241855703,  37.054257344499717,  23.707343328192596,  -14.949021655598202,
+      -10.760611497035882, 8.367260859616664,   8.5357943668839624,  7.38606168778702,    -23.210826670297152,
+      -85.15197822983292,  18.98804944849401,   64.754272117396766,  4.5584678980835918,  45.81912726561103,
+      58.769584663215738,  47.626531326553447,  23.370742401854159,  47.06767164062336,   -53.975231802858218,
+      -81.51278133300454,  -74.554960772880221, 75.572387546643355,  61.339053128914685,  -41.370881014041672,
+      -41.817524439980467, 58.391560533135817,  -85.991024651293131, -36.626332669233776, 30.421304081280084,
+      20.637382412674096,  58.342347077360131,  -79.757902483702381, 98.151410701129635,  -9.4994975790183389,
+      -87.49117921577357,  -39.384192078363533, 79.849643090061676,  93.096197902468759,  -64.374502025066192,
+      82.037247010307937,  -20.951323678824952, 46.927431599533087,  -75.449850543801574, -54.603894223278004,
+      -59.419110050353098, -22.253989777496159, 86.943333900988137,  -83.887344220269938, 73.270857190511975,
+      84.784625452008811,  -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134,
+      47.637114710282589,  -69.50911815749248,  -69.848254012650372, -7.4520009269431711, 90.887158278825865,
+      -79.086012597191512, -45.713537271527976, 80.237684918029572,  -60.666381661910016, 68.727158732184449,
+      24.53669768915492,   -67.195147776790975, 72.610434112023597,  54.910597945673814,  -19.862686571231023,
+      32.244024128018054,  -95.648868731550635, -34.761757909478987, -86.334093878928797, -16.580223524066724,
+      39.48244113577249,   64.203567686297504,  0.77846541072490538, 59.620571575902432,  41.0983082454959,
+      -2.6491435658297036, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068,
+      -39.186662569988762, 76.266375356625161,  -51.650172854435624, -68.894636301310584, 98.410134045837452,
+      30.9954824410611,    78.440749922366507,  51.390453104722326,  70.625792807373429,  43.749856317813453,
+      -81.399433434996496, 88.385686355761862,  78.242223440453444,  27.539590130937498,  -6.9781781598207147,
+  };
+
+  // Run n_tests iterations of the loop choosing random numbers to put into the interface state computation and checking
+  // the results
+  for (size_t i = 0; i < n_tests; i++) {
+    // Run the function
+    double test_left_interface, test_right_interface;
+    size_t const idx = 5 * i;
+    reconstruction::PPM_Single_Variable(input_data[idx], input_data[idx + 1], input_data[idx + 2], input_data[idx + 3],
+                                        input_data[idx + 4], test_left_interface, test_right_interface);
+
+    // Compare results
+    testing_utilities::Check_Results(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface");
+    testing_utilities::Check_Results(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface");
+  }
+}
+
+TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput)
+{
+  // Set up test and mock up grid
+#ifdef MHD
+  reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8};
+#else   // MHD
+  reconstruction::Primitive interface{6, 7, 8, 9, 10};
+#endif  // MHD
+  size_t const nx = 3, ny = 3, nz = 3;
+  size_t const n_cells = nx * ny * nz;
+  size_t const xid = 1, yid = 1, zid = 1;
+  size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
+  size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z;
+  Real const gamma = 5. / 3.;
+
+  std::vector<Real> conserved(n_cells * grid_enum::num_fields);
+  std::vector<Real> interface_arr(n_cells * grid_enum::num_fields);
+
+  // Get test data
+  reconstruction::Write_Data(interface, interface_arr.data(), conserved.data(), id, n_cells, o1, o2, o3, gamma);
+
+// Fiducial Data
+#ifdef MHD
+  std::unordered_map<int, double> fiducial_interface = {{13, 1},     {40, 2},  {67, 3}, {94, 4},
+                                                        {121, 78.5}, {148, 7}, {175, 8}};
+#else   // MHD
+  std::unordered_map<int, double> fiducial_interface = {{13, 6}, {40, 42}, {67, 48}, {94, 54}, {121, 597}};
+#endif  // MHD
+
+  // Perform Comparison
+  for (size_t i = 0; i < interface_arr.size(); i++) {
+    // Check the interface
+    double test_val     = interface_arr.at(i);
+    double fiducial_val = (fiducial_interface.find(i) == fiducial_interface.end()) ? 0.0 : fiducial_interface[i];
+
+    testing_utilities::Check_Results(fiducial_val, test_val, "Interface at i=" + std::to_string(i));
+  }
+}
+
+TEST(tHYDROReconstructionPlmLimitInterfaces, CorrectInputExpectCorrectOutput)
+{
+  // Set up values to test
+  reconstruction::Primitive interface_l_iph, interface_r_imh;
+  reconstruction::Primitive cell_im1, cell_i, cell_ip1;
+  interface_r_imh.density    = -1.94432878387898625e+14;
+  interface_r_imh.velocity_x = 1.42049955114756404e-04;
+  interface_r_imh.velocity_y = -2.61311412306644180e-06;
+  interface_r_imh.velocity_z = -1.99429361865204601e-07;
+  interface_r_imh.pressure   = -2.01130121665840250e-14;
+  interface_l_iph.density    = 1.94433200621991188e+14;
+  interface_l_iph.velocity_x = 1.42025407335853601e-04;
+  interface_l_iph.velocity_y = -2.61311412306644180e-06;
+  interface_l_iph.velocity_z = -6.01154878659959398e-06;
+  interface_l_iph.pressure   = 2.01130321665840277e-14;
+
+  cell_im1.density    = 1.61101072114153951e+08;
+  cell_i.density      = 1.61117046279133737e+08;
+  cell_ip1.density    = 1.61011252191243321e+08;
+  cell_im1.velocity_x = 1.42067642369120116e-04;
+  cell_i.velocity_x   = 1.42037681225305003e-04;
+  cell_ip1.velocity_x = 1.41901817571928041e-04;
+  cell_im1.velocity_y = -2.61228250783092252e-06;
+  cell_i.velocity_y   = -2.61311412306644180e-06;
+  cell_ip1.velocity_y = -2.61155204131260820e-06;
+  cell_im1.velocity_z = 2.71420653365757378e-06;
+  cell_i.velocity_z   = -3.10548907423239929e-06;
+  cell_ip1.velocity_z = -8.91005201578514336e-06;
+  cell_im1.pressure   = 9.99999999999999945e-21;
+  cell_i.pressure     = 9.99999999999999945e-21;
+  cell_ip1.pressure   = 4.70262856027679407e-03;
+
+  // Set fiducial values
+  reconstruction::Primitive interface_r_imh_fiducial, interface_l_iph_fiducial;
+  interface_r_imh_fiducial.density    = 161101072.11415395;
+  interface_r_imh_fiducial.velocity_x = 1.42049955114756404e-04;
+  interface_r_imh_fiducial.velocity_y = -2.61311412306644180e-06;
+  interface_r_imh_fiducial.velocity_z = -1.99429361865204601e-07;
+  interface_r_imh_fiducial.pressure   = 9.99999999999999945e-21;
+  interface_l_iph_fiducial.density    = 1.61117046279133737e+08;
+  interface_l_iph_fiducial.velocity_x = 1.42025407335853601e-04;
+  interface_l_iph_fiducial.velocity_y = -2.61311412306644180e-06;
+  interface_l_iph_fiducial.velocity_z = -6.01154878659959398e-06;
+  interface_l_iph_fiducial.pressure   = 2.0113032166584028e-14;
+
+  // Run function
+  reconstruction::Plm_Limit_Interfaces(interface_l_iph, interface_r_imh, cell_im1, cell_i, cell_ip1);
+
+  // Check values
+  testing_utilities::Check_Results(interface_l_iph_fiducial.density, interface_l_iph.density,
+                                   "Mismatch in l_iph density");
+  testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_x, interface_l_iph.velocity_x,
+                                   "Mismatch in l_iph velocity_x");
+  testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_y, interface_l_iph.velocity_y,
+                                   "Mismatch in l_iph velocity_y");
+  testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_z, interface_l_iph.velocity_z,
+                                   "Mismatch in l_iph velocity_z");
+  testing_utilities::Check_Results(interface_l_iph_fiducial.pressure, interface_l_iph.pressure,
+                                   "Mismatch in l_iph pressure");
+  testing_utilities::Check_Results(interface_r_imh_fiducial.density, interface_r_imh.density,
+                                   "Mismatch in r_imh density");
+  testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_x, interface_r_imh.velocity_x,
+                                   "Mismatch in r_imh velocity_x");
+  testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_y, interface_r_imh.velocity_y,
+                                   "Mismatch in r_imh velocity_y");
+  testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_z, interface_r_imh.velocity_z,
+                                   "Mismatch in r_imh velocity_z");
+  testing_utilities::Check_Results(interface_r_imh_fiducial.pressure, interface_r_imh.pressure,
+                                   "Mismatch in r_imh pressure");
+}
diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu
index d84464828..9e0a4cff2 100644
--- a/src/riemann_solvers/exact_cuda.cu
+++ b/src/riemann_solvers/exact_cuda.cu
@@ -1,114 +1,119 @@
 /*! \file exact_cuda.cu
  *  \brief Function definitions for the cuda exact Riemann solver.*/
 
-#ifdef CUDA
-
-#include "../utils/gpu.hpp"
 #include <math.h>
 #include <stdio.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../riemann_solvers/exact_cuda.h"
+#include "../utils/gpu.hpp"
 
-#ifdef DE //PRESSURE_DE
-#include "../utils/hydro_utilities.h"
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
 #endif
 
-
-
-/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
- *  \brief Exact Riemann solver based on the Fortran code given in Sec. 4.9 of Toro (1999). */
-__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
+/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int
+ * n_fields) \brief Exact Riemann solver based on the Fortran code given in
+ * Sec. 4.9 of Toro (1999). */
+__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                            int nz, int n_ghost, Real gamma, int dir, int n_fields)
 {
   // get a thread index
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId * blockDim.x;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
+  int zid     = tid / (nx * ny);
+  int yid     = (tid - zid * nx * ny) / nx;
+  int xid     = tid - zid * nx * ny - yid * nx;
 
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
   int o1, o2, o3;
   if (dir == 0) {
-    o1 = 1; o2 = 2; o3 = 3;
+    o1 = 1;
+    o2 = 2;
+    o3 = 3;
   }
   if (dir == 1) {
-    o1 = 2; o2 = 3; o3 = 1;
+    o1 = 2;
+    o2 = 3;
+    o3 = 1;
   }
   if (dir == 2) {
-    o1 = 3; o2 = 1; o3 = 2;
+    o1 = 3;
+    o2 = 1;
+    o3 = 2;
   }
 
-  Real dl, vxl, vyl, vzl, pl, cl; //density, velocity, pressure, sound speed (left)
-  Real dr, vxr, vyr, vzr, pr, cr; //density, velocity, pressure, sound speed (right)
-  Real ds, vs, ps, Es; //sample_CUDAd density, velocity, pressure, total energy
-  Real vm, pm; //velocity and pressure in the star region
+  Real dl, vxl, vyl, vzl, pl,
+      cl;  // density, velocity, pressure, sound speed (left)
+  Real dr, vxr, vyr, vzr, pr,
+      cr;               // density, velocity, pressure, sound speed (right)
+  Real ds, vs, ps, Es;  // sample_CUDAd density, velocity, pressure, total
+                        // energy
+  Real vm, pm;          // velocity and pressure in the star region
 
-  #ifdef DE
-  Real gel, ger, E_kin, E, dge ;
-  #endif
+#ifdef DE
+  Real gel, ger, E_kin, E, dge;
+#endif
 
-  #ifdef SCALAR
+#ifdef SCALAR
   Real scalarl[NSCALARS], scalarr[NSCALARS];
-  #endif
-
+#endif
 
   // Each thread executes the solver independently
-  //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz)
-  if (xid < nx && yid < ny && zid < nz)
-  {
+  // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz)
+  if (xid < nx && yid < ny && zid < nz) {
     // retrieve primitive variables
-    dl  = dev_bounds_L[            tid];
-    vxl = dev_bounds_L[o1*n_cells + tid]/dl;
-    vyl = dev_bounds_L[o2*n_cells + tid]/dl;
-    vzl = dev_bounds_L[o3*n_cells + tid]/dl;
-    #ifdef DE //PRESSURE_DE
-    E = dev_bounds_L[4*n_cells + tid];
-    E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl );
-    dge = dev_bounds_L[(n_fields-1)*n_cells + tid];
-    pl = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    pl  = (dev_bounds_L[4*n_cells + tid] - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    pl  = fmax(pl, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalarl[i] = dev_bounds_L[(5+i)*n_cells + tid]/dl;
+    dl  = dev_bounds_L[tid];
+    vxl = dev_bounds_L[o1 * n_cells + tid] / dl;
+    vyl = dev_bounds_L[o2 * n_cells + tid] / dl;
+    vzl = dev_bounds_L[o3 * n_cells + tid] / dl;
+#ifdef DE  // PRESSURE_DE
+    E     = dev_bounds_L[4 * n_cells + tid];
+    E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl);
+    dge   = dev_bounds_L[(n_fields - 1) * n_cells + tid];
+    pl    = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+#else
+    pl = (dev_bounds_L[4 * n_cells + tid] - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0);
+#endif  // PRESSURE_DE
+    pl = fmax(pl, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalarl[i] = dev_bounds_L[(5 + i) * n_cells + tid] / dl;
     }
-    #endif
-    #ifdef DE
+#endif
+#ifdef DE
     gel = dge / dl;
-    #endif
-    dr  = dev_bounds_R[            tid];
-    vxr = dev_bounds_R[o1*n_cells + tid]/dr;
-    vyr = dev_bounds_R[o2*n_cells + tid]/dr;
-    vzr = dev_bounds_R[o3*n_cells + tid]/dr;
-    #ifdef DE //PRESSURE_DE
-    E = dev_bounds_R[4*n_cells + tid];
-    E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr );
-    dge = dev_bounds_R[(n_fields-1)*n_cells + tid];
-    pr = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma );
-    #else
-    pr  = (dev_bounds_R[4*n_cells + tid] - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    pr  = fmax(pr, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      scalarr[i] = dev_bounds_R[(5+i)*n_cells + tid]/dr;
+#endif
+    dr  = dev_bounds_R[tid];
+    vxr = dev_bounds_R[o1 * n_cells + tid] / dr;
+    vyr = dev_bounds_R[o2 * n_cells + tid] / dr;
+    vzr = dev_bounds_R[o3 * n_cells + tid] / dr;
+#ifdef DE  // PRESSURE_DE
+    E     = dev_bounds_R[4 * n_cells + tid];
+    E_kin = 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr);
+    dge   = dev_bounds_R[(n_fields - 1) * n_cells + tid];
+    pr    = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma);
+#else
+    pr = (dev_bounds_R[4 * n_cells + tid] - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0);
+#endif  // PRESSURE_DE
+    pr = fmax(pr, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      scalarr[i] = dev_bounds_R[(5 + i) * n_cells + tid] / dr;
     }
-    #endif
-    #ifdef DE
+#endif
+#ifdef DE
     ger = dge / dr;
-    #endif
-
+#endif
 
     // compute sounds speeds in left and right regions
     cl = sqrt(gamma * pl / dl);
     cr = sqrt(gamma * pr / dr);
 
     // test for the pressure positivity condition
-    if ((2.0 / (gamma - 1.0)) * (cl+cr) <= (vxr-vxl))
-    {
+    if ((2.0 / (gamma - 1.0)) * (cl + cr) <= (vxr - vxl)) {
       // the initial data is such that vacuum is generated
       printf("Vacuum is generated by the initial data.\n");
       printf("%f %f %f %f %f %f\n", dl, vxl, pl, dr, vxr, pr);
@@ -117,46 +122,41 @@ __global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds
     // Find the exact solution for pressure and velocity in the star region
     starpv_CUDA(&pm, &vm, dl, vxl, pl, cl, dr, vxr, pr, cr, gamma);
 
-    //sample_CUDA the solution at the cell interface
+    // sample_CUDA the solution at the cell interface
     sample_CUDA(pm, vm, &ds, &vs, &ps, dl, vxl, pl, cl, dr, vxr, pr, cr, gamma);
 
     // calculate the fluxes through the cell interface
-    dev_flux[tid] = ds*vs;
-    dev_flux[o1*n_cells + tid] = ds*vs*vs+ps;
-    if (vs >= 0)
-    {
-      dev_flux[o2*n_cells + tid] = ds*vs*vyl;
-      dev_flux[o3*n_cells + tid] = ds*vs*vzl;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells + tid] = ds*vs*scalarl[i];
+    dev_flux[tid]                = ds * vs;
+    dev_flux[o1 * n_cells + tid] = ds * vs * vs + ps;
+    if (vs >= 0) {
+      dev_flux[o2 * n_cells + tid] = ds * vs * vyl;
+      dev_flux[o3 * n_cells + tid] = ds * vs * vzl;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = ds * vs * scalarl[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells + tid] = ds*vs*gel;
-      #endif
-      Es = (ps/(gamma - 1.0)) + 0.5*ds*(vs*vs + vyl*vyl + vzl*vzl);
-    }
-    else
-    {
-      dev_flux[o2*n_cells + tid] = ds*vs*vyr;
-      dev_flux[o3*n_cells + tid] = ds*vs*vzr;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells + tid] = ds*vs*scalarr[i];
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = ds * vs * gel;
+#endif
+      Es = (ps / (gamma - 1.0)) + 0.5 * ds * (vs * vs + vyl * vyl + vzl * vzl);
+    } else {
+      dev_flux[o2 * n_cells + tid] = ds * vs * vyr;
+      dev_flux[o3 * n_cells + tid] = ds * vs * vzr;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = ds * vs * scalarr[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells + tid] = ds*vs*ger;
-      #endif
-      Es = (ps/(gamma - 1.0)) + 0.5*ds*(vs*vs + vyr*vyr + vzr*vzr);
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = ds * vs * ger;
+#endif
+      Es = (ps / (gamma - 1.0)) + 0.5 * ds * (vs * vs + vyr * vyr + vzr * vzr);
     }
-    dev_flux[4*n_cells + tid] = (Es+ps)*vs;
+    dev_flux[4 * n_cells + tid] = (Es + ps) * vs;
   }
-
 }
 
-
 __device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma)
 {
   // purpose:  to provide a guessed value for pressure
@@ -169,20 +169,23 @@ __device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real v
   const Real TOL = 1.0e-6;
 
   // compute guess pressure from PVRS Riemann solver
-  ppv = 0.5*(pl + pr) + 0.125*(vxl - vxr)*(dl + dr)*(cl + cr);
+  ppv = 0.5 * (pl + pr) + 0.125 * (vxl - vxr) * (dl + dr) * (cl + cr);
 
-  if (ppv < 0.0) ppv = 0.0;
+  if (ppv < 0.0) {
+    ppv = 0.0;
+  }
   // Two-Shock Riemann solver with PVRS as estimate
-  gl = sqrt((2.0 / ((gamma + 1.0)*dl))/(((gamma - 1.0) / (gamma + 1.0))*pl + ppv));
-  gr = sqrt((2.0 / ((gamma + 1.0)*dr))/(((gamma - 1.0) / (gamma + 1.0))*pr + ppv));
-  pm = (gl*pl + gr*pr - (vxr-vxl))/(gl + gr);
+  gl = sqrt((2.0 / ((gamma + 1.0) * dl)) / (((gamma - 1.0) / (gamma + 1.0)) * pl + ppv));
+  gr = sqrt((2.0 / ((gamma + 1.0) * dr)) / (((gamma - 1.0) / (gamma + 1.0)) * pr + ppv));
+  pm = (gl * pl + gr * pr - (vxr - vxl)) / (gl + gr);
 
-  if (pm < 0.0) pm = TOL;
+  if (pm < 0.0) {
+    pm = TOL;
+  }
 
   return pm;
 }
 
-
 __device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, Real ck, Real gamma)
 {
   // purpose:  to evaluate the pressure functions
@@ -193,56 +196,56 @@ __device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, Real ck
 
   if (p <= pk) {
     // rarefaction wave
-    *f = (2.0 / (gamma - 1.0))*ck*(powf(p/pk, (gamma - 1.0)/(2.0 * gamma)) - 1.0);
-    *fd = (1.0/(dk*ck))*powf((p/pk), -((gamma + 1.0)/(2.0 * gamma)));
-  }
-  else
-  {
+    *f  = (2.0 / (gamma - 1.0)) * ck * (powf(p / pk, (gamma - 1.0) / (2.0 * gamma)) - 1.0);
+    *fd = (1.0 / (dk * ck)) * powf((p / pk), -((gamma + 1.0) / (2.0 * gamma)));
+  } else {
     // shock wave
-    qrt = sqrt(((2.0 / (gamma + 1.0)) / dk)/((((gamma - 1.0) / (gamma + 1.0)) * pk) + p));
-    *f = (p - pk)*qrt;
-    *fd = (1.0 - 0.5*(p - pk)/((((gamma - 1.0) / (gamma + 1.0)) * pk) + p))*qrt;
+    qrt = sqrt(((2.0 / (gamma + 1.0)) / dk) / ((((gamma - 1.0) / (gamma + 1.0)) * pk) + p));
+    *f  = (p - pk) * qrt;
+    *fd = (1.0 - 0.5 * (p - pk) / ((((gamma - 1.0) / (gamma + 1.0)) * pk) + p)) * qrt;
   }
 }
 
-
-__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma)
+__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr,
+                            Real gamma)
 {
   // purpose:  Uses Newton-Raphson iteration
   // to compute the solution for pressure and
   // velocity in the Star Region
 
   const int nriter = 20;
-  const Real TOL = 1.0e-6;
+  const Real TOL   = 1.0e-6;
   Real change, fl, fld, fr, frd, pold, pstart;
 
-  //guessed value pstart is computed
+  // guessed value pstart is computed
   pstart = guessp_CUDA(dl, vxl, pl, cl, dr, vxr, pr, cr, gamma);
-  pold = pstart;
+  pold   = pstart;
 
   int i = 0;
-  for (i=0 ; i <= nriter; i++) {
+  for (i = 0; i <= nriter; i++) {
     prefun_CUDA(&fl, &fld, pold, dl, pl, cl, gamma);
     prefun_CUDA(&fr, &frd, pold, dr, pr, cr, gamma);
-    *p = pold - (fl + fr + vxr - vxl)/(fld + frd);
-    change = 2.0*fabs((*p - pold)/(*p + pold));
+    *p     = pold - (fl + fr + vxr - vxl) / (fld + frd);
+    change = 2.0 * fabs((*p - pold) / (*p + pold));
 
-    if (change <= TOL) break;
-    if (*p < 0.0) *p = TOL;
+    if (change <= TOL) {
+      break;
+    }
+    if (*p < 0.0) {
+      *p = TOL;
+    }
     pold = *p;
   }
   if (i > nriter) {
-    //printf("Divergence in Newton-Raphson iteration. p = %e\n", *p);
+    // printf("Divergence in Newton-Raphson iteration. p = %e\n", *p);
   }
 
   // compute velocity in star region
-  *v = 0.5*(vxl + vxr + fr - fl);
-
+  *v = 0.5 * (vxl + vxr + fr - fl);
 }
 
-
-__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p,
-      Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma)
+__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, Real dl, Real vxl, Real pl,
+                            Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma)
 {
   // purpose:  to sample the solution throughout the wave
   //   pattern. Pressure pm and velocity vm in the
@@ -251,96 +254,81 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real
 
   Real c, sl, sr;
 
-  if (vm >= 0) // sampling point lies to the left of the contact discontinuity
+  if (vm >= 0)  // sampling point lies to the left of the contact discontinuity
   {
-    if (pm <= pl) // left rarefaction
+    if (pm <= pl)  // left rarefaction
     {
-      if (vxl - cl >= 0) // sampled point is in left data state
+      if (vxl - cl >= 0)  // sampled point is in left data state
       {
         *d = dl;
         *v = vxl;
         *p = pl;
-      }
-      else
-      {
-        if (vm - cl*powf(pm/pl, (gamma - 1.0)/(2.0 * gamma)) < 0) // sampled point is in star left state
+      } else {
+        if (vm - cl * powf(pm / pl, (gamma - 1.0) / (2.0 * gamma)) < 0)  // sampled point is in star left state
         {
-          *d = dl*powf(pm/pl, 1.0/gamma);
+          *d = dl * powf(pm / pl, 1.0 / gamma);
           *v = vm;
           *p = pm;
-        }
-        else // sampled point is inside left fan
+        } else  // sampled point is inside left fan
         {
-          c = (2.0 / (gamma + 1.0))*(cl + ((gamma - 1.0) / 2.0)*vxl);
+          c  = (2.0 / (gamma + 1.0)) * (cl + ((gamma - 1.0) / 2.0) * vxl);
           *v = c;
-          *d = dl*powf(c/cl, 2.0 / (gamma - 1.0));
-          *p = pl*powf(c/cl, 2.0 * gamma / (gamma - 1.0));
+          *d = dl * powf(c / cl, 2.0 / (gamma - 1.0));
+          *p = pl * powf(c / cl, 2.0 * gamma / (gamma - 1.0));
         }
       }
-    }
-    else // left shock
+    } else  // left shock
     {
-      sl = vxl - cl*sqrt(((gamma + 1.0)/(2.0 * gamma))*(pm/pl) + ((gamma - 1.0)/(2.0 * gamma)));
-      if (sl >= 0) // sampled point is in left data state
+      sl = vxl - cl * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pl) + ((gamma - 1.0) / (2.0 * gamma)));
+      if (sl >= 0)  // sampled point is in left data state
       {
         *d = dl;
         *v = vxl;
         *p = pl;
-      }
-      else // sampled point is in star left state
+      } else  // sampled point is in star left state
       {
-        *d = dl*(pm/pl + ((gamma - 1.0) / (gamma + 1.0)))/((pm/pl)*((gamma - 1.0) / (gamma + 1.0)) + 1.0);
+        *d = dl * (pm / pl + ((gamma - 1.0) / (gamma + 1.0))) / ((pm / pl) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0);
         *v = vm;
         *p = pm;
       }
     }
-  }
-  else // sampling point lies to the right of the contact discontinuity
+  } else  // sampling point lies to the right of the contact discontinuity
   {
-    if (pm > pr) // right shock
+    if (pm > pr)  // right shock
     {
-      sr = vxr + cr*sqrt(((gamma + 1.0)/(2.0 * gamma))*(pm/pr) + ((gamma - 1.0)/(2.0 * gamma)));
-      if (sr <= 0) // sampled point is in right data state
+      sr = vxr + cr * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pr) + ((gamma - 1.0) / (2.0 * gamma)));
+      if (sr <= 0)  // sampled point is in right data state
       {
         *d = dr;
         *v = vxr;
         *p = pr;
-      }
-      else // sampled point is in star right state
+      } else  // sampled point is in star right state
       {
-        *d = dr*(pm/pr + ((gamma - 1.0) / (gamma + 1.0)))/((pm/pr)*((gamma - 1.0) / (gamma + 1.0)) + 1.0);
+        *d = dr * (pm / pr + ((gamma - 1.0) / (gamma + 1.0))) / ((pm / pr) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0);
         *v = vm;
         *p = pm;
       }
-    }
-    else // right rarefaction
+    } else  // right rarefaction
     {
-      if (vxr + cr <= 0) // sampled point is in right data state
+      if (vxr + cr <= 0)  // sampled point is in right data state
       {
         *d = dr;
         *v = vxr;
         *p = pr;
-      }
-      else
-      {
-        if (vm + cr*powf(pm/pr, (gamma - 1.0)/(2.0 * gamma)) >= 0) // sampled point is in star right state
+      } else {
+        if (vm + cr * powf(pm / pr, (gamma - 1.0) / (2.0 * gamma)) >= 0)  // sampled point is in star right state
         {
-          *d = dr*powf(pm/pr, (1.0/gamma));
+          *d = dr * powf(pm / pr, (1.0 / gamma));
           *v = vm;
           *p = pm;
-        }
-        else // sampled point is inside right fan
+        } else  // sampled point is inside right fan
         {
-          c = (2.0 / (gamma + 1.0))*(cr - ((gamma - 1.0) / 2.0)*vxr);
+          c  = (2.0 / (gamma + 1.0)) * (cr - ((gamma - 1.0) / 2.0) * vxr);
           *v = -c;
-          *d = dr*powf(c/cr, 2.0 / (gamma - 1.0));
-          *p = pr*powf(c/cr, 2.0 * gamma / (gamma - 1.0));
+          *d = dr * powf(c / cr, 2.0 / (gamma - 1.0));
+          *p = pr * powf(c / cr, 2.0 * gamma / (gamma - 1.0));
         }
       }
     }
   }
 }
-
-
-
-#endif //CUDA
diff --git a/src/riemann_solvers/exact_cuda.h b/src/riemann_solvers/exact_cuda.h
index 4d6d1f3d6..4cb004fb5 100644
--- a/src/riemann_solvers/exact_cuda.h
+++ b/src/riemann_solvers/exact_cuda.h
@@ -1,27 +1,27 @@
 /*! \file exact_cuda.h
- *  \brief Declarations of functions for the cuda exact riemann solver kernel. */
-
-#ifdef CUDA
+ *  \brief Declarations of functions for the cuda exact riemann solver kernel.
+ */
 
 #ifndef EXACT_CUDA_H
 #define EXACT_CUDA_H
 
 #include "../global/global.h"
 
-
-/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
- *  \brief Exact Riemann solver based on the Fortran code given in Sec. 4.9 of Toro (1999). */
-__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields);
+/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int
+ * n_fields) \brief Exact Riemann solver based on the Fortran code given in
+ * Sec. 4.9 of Toro (1999). */
+__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                            int nz, int n_ghost, Real gamma, int dir, int n_fields);
 
 __device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma);
 
 __device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, Real ck, Real gamma);
 
-__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma);
-
-__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p,
-                       Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma);
+__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr,
+                            Real gamma);
 
+__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, Real dl, Real vxl, Real pl,
+                            Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma);
 
-#endif //EXACT_CUDA_H
-#endif //CUDA
+#endif  // EXACT_CUDA_H
diff --git a/src/riemann_solvers/hll_cuda.cu b/src/riemann_solvers/hll_cuda.cu
index a69cf9d0f..2987771b2 100644
--- a/src/riemann_solvers/hll_cuda.cu
+++ b/src/riemann_solvers/hll_cuda.cu
@@ -1,31 +1,32 @@
 /*! \file hllc_cuda.cu
  *  \brief Function definitions for the cuda HLLC Riemann solver.*/
 
-#ifdef CUDA
-
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../riemann_solvers/hll_cuda.h"
+#include "../utils/gpu.hpp"
 
-#ifdef DE //PRESSURE_DE
-#include "../utils/hydro_utilities.h"
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
 #endif
 
-
-/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
- *  \brief HLLC Riemann solver based on the version described in Toro (2006), Sec. 10.4. */
-__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
+/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int
+ * n_fields) \brief HLLC Riemann solver based on the version described in Toro
+ * (2006), Sec. 10.4. */
+__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                          int nz, int n_ghost, Real gamma, int dir, int n_fields)
 {
   // get a thread index
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId * blockDim.x;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
+  int zid     = tid / (nx * ny);
+  int yid     = (tid - zid * nx * ny) / nx;
+  int xid     = tid - zid * nx * ny - yid * nx;
 
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
 
   Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El;
   Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er;
@@ -40,70 +41,75 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R
   // Real dls, drs, mxls, mxrs, myls, myrs, mzls, mzrs, Els, Ers;
   Real f_d, f_mx, f_my, f_mz, f_E;
   Real Sl, Sr, cfl, cfr;
-  #ifdef DE
+#ifdef DE
   Real dgel, dger, f_ge_l, f_ge_r, f_ge, E_kin;
-  #endif
-  #ifdef SCALAR
+#endif
+#ifdef SCALAR
   Real dscl[NSCALARS], dscr[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS];
-  #endif
+#endif
 
   // Real etah = 0;
 
   int o1, o2, o3;
-  if (dir==0) {
-    o1 = 1; o2 = 2; o3 = 3;
+  if (dir == 0) {
+    o1 = 1;
+    o2 = 2;
+    o3 = 3;
   }
-  if (dir==1) {
-    o1 = 2; o2 = 3; o3 = 1;
+  if (dir == 1) {
+    o1 = 2;
+    o2 = 3;
+    o3 = 1;
   }
-  if (dir==2) {
-    o1 = 3; o2 = 1; o3 = 2;
+  if (dir == 2) {
+    o1 = 3;
+    o2 = 1;
+    o3 = 2;
   }
 
   // Each thread executes the solver independently
-  //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz)
-  if (xid < nx && yid < ny && zid < nz)
-  {
+  // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz)
+  if (xid < nx && yid < ny && zid < nz) {
     // retrieve conserved variables
-    dl  = dev_bounds_L[             tid];
-    mxl = dev_bounds_L[o1*n_cells + tid];
-    myl = dev_bounds_L[o2*n_cells + tid];
-    mzl = dev_bounds_L[o3*n_cells + tid];
-    El  = dev_bounds_L[4*n_cells + tid];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dscl[i] = dev_bounds_L[(5+i)*n_cells + tid];
+    dl  = dev_bounds_L[tid];
+    mxl = dev_bounds_L[o1 * n_cells + tid];
+    myl = dev_bounds_L[o2 * n_cells + tid];
+    mzl = dev_bounds_L[o3 * n_cells + tid];
+    El  = dev_bounds_L[4 * n_cells + tid];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dscl[i] = dev_bounds_L[(5 + i) * n_cells + tid];
     }
-    #endif
-    #ifdef DE
-    dgel = dev_bounds_L[(n_fields-1)*n_cells + tid];
-    #endif
+#endif
+#ifdef DE
+    dgel = dev_bounds_L[(n_fields - 1) * n_cells + tid];
+#endif
 
-    dr  = dev_bounds_R[            tid];
-    mxr = dev_bounds_R[o1*n_cells + tid];
-    myr = dev_bounds_R[o2*n_cells + tid];
-    mzr = dev_bounds_R[o3*n_cells + tid];
-    Er  = dev_bounds_R[4*n_cells + tid];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dscr[i] = dev_bounds_R[(5+i)*n_cells + tid];
+    dr  = dev_bounds_R[tid];
+    mxr = dev_bounds_R[o1 * n_cells + tid];
+    myr = dev_bounds_R[o2 * n_cells + tid];
+    mzr = dev_bounds_R[o3 * n_cells + tid];
+    Er  = dev_bounds_R[4 * n_cells + tid];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dscr[i] = dev_bounds_R[(5 + i) * n_cells + tid];
     }
-    #endif
-    #ifdef DE
-    dger = dev_bounds_R[(n_fields-1)*n_cells + tid];
-    #endif
+#endif
+#ifdef DE
+    dger = dev_bounds_R[(n_fields - 1) * n_cells + tid];
+#endif
 
     // calculate primitive variables
     vxl = mxl / dl;
     vyl = myl / dl;
     vzl = mzl / dl;
-    #ifdef DE //PRESSURE_DE
-    E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl );
-    pl = hydro_utilities::Get_Pressure_From_DE( El, El - E_kin, dgel, gamma );
-    #else
-    pl  = (El - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0);
-    #endif//DE
-    pl  = fmax(pl, (Real) TINY_NUMBER);
+#ifdef DE  // PRESSURE_DE
+    E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl);
+    pl    = hydro_utilities::Get_Pressure_From_DE(El, El - E_kin, dgel, gamma);
+#else
+    pl = (El - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0);
+#endif  // DE
+    pl = fmax(pl, (Real)TINY_NUMBER);
     // #ifdef SCALAR
     // for (int i=0; i<NSCALARS; i++) {
     //   scl[i] = dscl[i] / dl;
@@ -115,13 +121,13 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R
     vxr = mxr / dr;
     vyr = myr / dr;
     vzr = mzr / dr;
-    #ifdef DE //PRESSURE_DE
-    E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr );
-    pr = hydro_utilities::Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma );
-    #else
-    pr  = (Er - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0);
-    #endif//DE
-    pr  = fmax(pr, (Real) TINY_NUMBER);
+#ifdef DE  // PRESSURE_DE
+    E_kin = 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr);
+    pr    = hydro_utilities::Get_Pressure_From_DE(Er, Er - E_kin, dger, gamma);
+#else
+    pr = (Er - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0);
+#endif  // DE
+    pr = fmax(pr, (Real)TINY_NUMBER);
     // #ifdef SCALAR
     // for (int i=0; i<NSCALARS; i++) {
     //   scr[i] = dscr[i] / dr;
@@ -149,19 +155,16 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R
     // asq = g1*(H - 0.5*vsq);
     // a = sqrt(asq);
 
-    // calculate the averaged eigenvectors of the Roe matrix (Stone Eqn B2, Toro 11.107)
-    // lambda_m = vx - a;
-    // lambda_p = vx + a;
-
+    // calculate the averaged eigenvectors of the Roe matrix (Stone Eqn B2,
+    // Toro 11.107) lambda_m = vx - a; lambda_p = vx + a;
 
     // compute max and min wave speeds
-    cfl = sqrt(gamma*pl/dl);  // sound speed in left state
-    cfr = sqrt(gamma*pr/dr);  // sound speed in right state
+    cfl = sqrt(gamma * pl / dl);  // sound speed in left state
+    cfr = sqrt(gamma * pr / dr);  // sound speed in right state
 
-    // for signal speeds, take max/min of Roe eigenvalues and left and right sound speeds
-    // Batten eqn. 48
-    // Sl = fmin(lambda_m, vxl - cfl);
-    // Sr = fmax(lambda_p, vxr + cfr);
+    // for signal speeds, take max/min of Roe eigenvalues and left and right
+    // sound speeds Batten eqn. 48 Sl = fmin(lambda_m, vxl - cfl); Sr =
+    // fmax(lambda_p, vxr + cfr);
 
     // if the H-correction is turned on, add cross-flux dissipation
     // Sl = sgn_CUDA(Sl)*fmax(fabs(Sl), etah);
@@ -169,107 +172,97 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R
     Sl = fmin(vxr - cfr, vxl - cfl);
     Sr = fmax(vxl + cfl, vxr + cfr);
 
-
     // left and right fluxes
     f_d_l  = mxl;
-    f_mx_l = mxl*vxl + pl;
-    f_my_l = myl*vxl;
-    f_mz_l = mzl*vxl;
-    f_E_l  = (El + pl)*vxl;
-    #ifdef DE
-    f_ge_l = dgel*vxl;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      f_sc_l[i] = dscl[i]*vxl;
+    f_mx_l = mxl * vxl + pl;
+    f_my_l = myl * vxl;
+    f_mz_l = mzl * vxl;
+    f_E_l  = (El + pl) * vxl;
+#ifdef DE
+    f_ge_l = dgel * vxl;
+#endif
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      f_sc_l[i] = dscl[i] * vxl;
     }
-    #endif
+#endif
 
     f_d_r  = mxr;
-    f_mx_r = mxr*vxr + pr;
-    f_my_r = myr*vxr;
-    f_mz_r = mzr*vxr;
-    f_E_r  = (Er + pr)*vxr;
-    #ifdef DE
-    f_ge_r = dger*vxr;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      f_sc_r[i] = dscr[i]*vxr;
+    f_mx_r = mxr * vxr + pr;
+    f_my_r = myr * vxr;
+    f_mz_r = mzr * vxr;
+    f_E_r  = (Er + pr) * vxr;
+#ifdef DE
+    f_ge_r = dger * vxr;
+#endif
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      f_sc_r[i] = dscr[i] * vxr;
     }
-    #endif
+#endif
 
     // return upwind flux if flow is supersonic
     if (Sl > 0.0) {
-      dev_flux[           tid] = f_d_l;
-      dev_flux[o1*n_cells+tid] = f_mx_l;
-      dev_flux[o2*n_cells+tid] = f_my_l;
-      dev_flux[o3*n_cells+tid] = f_mz_l;
-      dev_flux[4*n_cells+tid]  = f_E_l;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid]  = f_sc_l[i];
+      dev_flux[tid]                = f_d_l;
+      dev_flux[o1 * n_cells + tid] = f_mx_l;
+      dev_flux[o2 * n_cells + tid] = f_my_l;
+      dev_flux[o3 * n_cells + tid] = f_mz_l;
+      dev_flux[4 * n_cells + tid]  = f_E_l;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_sc_l[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid]  = f_ge_l;
-      #endif
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_l;
+#endif
       return;
-    }
-    else if (Sr < 0.0) {
-      dev_flux[           tid] = f_d_r;
-      dev_flux[o1*n_cells+tid] = f_mx_r;
-      dev_flux[o2*n_cells+tid] = f_my_r;
-      dev_flux[o3*n_cells+tid] = f_mz_r;
-      dev_flux[4*n_cells+tid]  = f_E_r;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid]  = f_sc_r[i];
+    } else if (Sr < 0.0) {
+      dev_flux[tid]                = f_d_r;
+      dev_flux[o1 * n_cells + tid] = f_mx_r;
+      dev_flux[o2 * n_cells + tid] = f_my_r;
+      dev_flux[o3 * n_cells + tid] = f_mz_r;
+      dev_flux[4 * n_cells + tid]  = f_E_r;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_sc_r[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid]  = f_ge_r;
-      #endif
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_r;
+#endif
       return;
     }
     // otherwise compute subsonic flux
     else {
-
-      f_d = ( ( Sr * f_d_l ) - ( Sl * f_d_r ) + Sl*Sr*( dr - dl ) ) / ( Sr - Sl );
-      f_mx = ( ( Sr * f_mx_l ) - ( Sl * f_mx_r ) + Sl*Sr*( mxr - mxl ) ) / ( Sr - Sl );
-      f_my = ( ( Sr * f_my_l ) - ( Sl * f_my_r ) + Sl*Sr*( myr - myl ) ) / ( Sr - Sl );
-      f_mz = ( ( Sr * f_mz_l ) - ( Sl * f_mz_r ) + Sl*Sr*( mzr - mzl ) ) / ( Sr - Sl );
-      f_E = ( ( Sr * f_E_l ) - ( Sl * f_E_r ) + Sl*Sr*( Er - El ) ) / ( Sr - Sl );
-      #ifdef DE
-      f_ge = ( ( Sr * f_ge_l ) - ( Sl * f_ge_r ) + Sl*Sr*( dger - dgel ) ) / ( Sr - Sl );
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-      f_sc[i] = ( ( Sr * f_sc_l[i] ) - ( Sl * f_sc_r[i] ) + Sl*Sr*( dscr[i] - dscl[i] ) ) / ( Sr - Sl );
+      f_d  = ((Sr * f_d_l) - (Sl * f_d_r) + Sl * Sr * (dr - dl)) / (Sr - Sl);
+      f_mx = ((Sr * f_mx_l) - (Sl * f_mx_r) + Sl * Sr * (mxr - mxl)) / (Sr - Sl);
+      f_my = ((Sr * f_my_l) - (Sl * f_my_r) + Sl * Sr * (myr - myl)) / (Sr - Sl);
+      f_mz = ((Sr * f_mz_l) - (Sl * f_mz_r) + Sl * Sr * (mzr - mzl)) / (Sr - Sl);
+      f_E  = ((Sr * f_E_l) - (Sl * f_E_r) + Sl * Sr * (Er - El)) / (Sr - Sl);
+#ifdef DE
+      f_ge = ((Sr * f_ge_l) - (Sl * f_ge_r) + Sl * Sr * (dger - dgel)) / (Sr - Sl);
+#endif
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        f_sc[i] = ((Sr * f_sc_l[i]) - (Sl * f_sc_r[i]) + Sl * Sr * (dscr[i] - dscl[i])) / (Sr - Sl);
       }
-      #endif
-
-
+#endif
 
       // return the hllc fluxes
-      dev_flux[           tid] = f_d;
-      dev_flux[o1*n_cells+tid] = f_mx;
-      dev_flux[o2*n_cells+tid] = f_my;
-      dev_flux[o3*n_cells+tid] = f_mz;
-      dev_flux[4*n_cells+tid]  = f_E;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid]  = f_sc[i];
+      dev_flux[tid]                = f_d;
+      dev_flux[o1 * n_cells + tid] = f_mx;
+      dev_flux[o2 * n_cells + tid] = f_my;
+      dev_flux[o3 * n_cells + tid] = f_mz;
+      dev_flux[4 * n_cells + tid]  = f_E;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_sc[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid]  = f_ge;
-      #endif
-
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge;
+#endif
     }
   }
-
 }
-
-
-#endif //CUDA
diff --git a/src/riemann_solvers/hll_cuda.h b/src/riemann_solvers/hll_cuda.h
index b9a842657..43dc18cbe 100644
--- a/src/riemann_solvers/hll_cuda.h
+++ b/src/riemann_solvers/hll_cuda.h
@@ -1,19 +1,16 @@
 /*! \file hllc_cuda.h
  *  \brief Declarations of functions for the cuda hllc riemann solver kernel. */
 
-#ifdef CUDA
-
 #ifndef HLL_CUDA_H
 #define HLL_CUDA_H
 
 #include "../global/global.h"
 
+/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int
+ * n_fields) \brief Roe Riemann solver based on the version described in Stone
+ * et al, 2008. */
+__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                          int nz, int n_ghost, Real gamma, int dir, int n_fields);
 
-/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
- *  \brief Roe Riemann solver based on the version described in Stone et al, 2008. */
-__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields);
-
-
-
-#endif //HLLC_CUDA_H
-#endif //CUDA
+#endif  // HLLC_CUDA_H
diff --git a/src/riemann_solvers/hllc_cuda.cu b/src/riemann_solvers/hllc_cuda.cu
index 8a765bb8f..c923edf47 100644
--- a/src/riemann_solvers/hllc_cuda.cu
+++ b/src/riemann_solvers/hllc_cuda.cu
@@ -1,31 +1,32 @@
 /*! \file hllc_cuda.cu
  *  \brief Function definitions for the cuda HLLC Riemann solver.*/
 
-#ifdef CUDA
-
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../riemann_solvers/hllc_cuda.h"
+#include "../utils/gpu.hpp"
 
-#ifdef DE //PRESSURE_DE
-#include "../utils/hydro_utilities.h"
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
 #endif
 
-
-/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
- *  \brief HLLC Riemann solver based on the version described in Toro (2006), Sec. 10.4. */
-__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
+/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int
+ * n_fields) \brief HLLC Riemann solver based on the version described in Toro
+ * (2006), Sec. 10.4. */
+__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                           int nz, int n_ghost, Real gamma, int dir, int n_fields)
 {
   // get a thread index
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId * blockDim.x;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
+  int zid     = tid / (nx * ny);
+  int yid     = (tid - zid * nx * ny) / nx;
+  int xid     = tid - zid * nx * ny - yid * nx;
 
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
 
   Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El;
   Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er;
@@ -40,96 +41,102 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_
   Real dls, drs, mxls, mxrs, myls, myrs, mzls, mzrs, Els, Ers;
   Real f_d, f_mx, f_my, f_mz, f_E;
   Real Sl, Sr, Sm, cfl, cfr, ps;
-  #ifdef DE
+#ifdef DE
   Real dgel, dger, gel, ger, gels, gers, f_ge_l, f_ge_r, f_ge, E_kin;
-  #endif
-  #ifdef SCALAR
-  Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS];
-  #endif
+#endif
+#ifdef SCALAR
+  Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS],
+      f_sc_r[NSCALARS], f_sc[NSCALARS];
+#endif
 
   Real etah = 0;
 
   int o1, o2, o3;
-  if (dir==0) {
-    o1 = 1; o2 = 2; o3 = 3;
+  if (dir == 0) {
+    o1 = 1;
+    o2 = 2;
+    o3 = 3;
   }
-  if (dir==1) {
-    o1 = 2; o2 = 3; o3 = 1;
+  if (dir == 1) {
+    o1 = 2;
+    o2 = 3;
+    o3 = 1;
   }
-  if (dir==2) {
-    o1 = 3; o2 = 1; o3 = 2;
+  if (dir == 2) {
+    o1 = 3;
+    o2 = 1;
+    o3 = 2;
   }
 
   // Each thread executes the solver independently
-  //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz)
-  if (xid < nx && yid < ny && zid < nz)
-  {
+  // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz)
+  if (xid < nx && yid < ny && zid < nz) {
     // retrieve conserved variables
-    dl  = dev_bounds_L[             tid];
-    mxl = dev_bounds_L[o1*n_cells + tid];
-    myl = dev_bounds_L[o2*n_cells + tid];
-    mzl = dev_bounds_L[o3*n_cells + tid];
-    El  = dev_bounds_L[4*n_cells + tid];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dscl[i] = dev_bounds_L[(5+i)*n_cells + tid];
+    dl  = dev_bounds_L[tid];
+    mxl = dev_bounds_L[o1 * n_cells + tid];
+    myl = dev_bounds_L[o2 * n_cells + tid];
+    mzl = dev_bounds_L[o3 * n_cells + tid];
+    El  = dev_bounds_L[4 * n_cells + tid];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dscl[i] = dev_bounds_L[(5 + i) * n_cells + tid];
     }
-    #endif
-    #ifdef DE
-    dgel = dev_bounds_L[(n_fields-1)*n_cells + tid];
-    #endif
+#endif
+#ifdef DE
+    dgel = dev_bounds_L[(n_fields - 1) * n_cells + tid];
+#endif
 
-    dr  = dev_bounds_R[            tid];
-    mxr = dev_bounds_R[o1*n_cells + tid];
-    myr = dev_bounds_R[o2*n_cells + tid];
-    mzr = dev_bounds_R[o3*n_cells + tid];
-    Er  = dev_bounds_R[4*n_cells + tid];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dscr[i] = dev_bounds_R[(5+i)*n_cells + tid];
+    dr  = dev_bounds_R[tid];
+    mxr = dev_bounds_R[o1 * n_cells + tid];
+    myr = dev_bounds_R[o2 * n_cells + tid];
+    mzr = dev_bounds_R[o3 * n_cells + tid];
+    Er  = dev_bounds_R[4 * n_cells + tid];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dscr[i] = dev_bounds_R[(5 + i) * n_cells + tid];
     }
-    #endif
-    #ifdef DE
-    dger = dev_bounds_R[(n_fields-1)*n_cells + tid];
-    #endif
+#endif
+#ifdef DE
+    dger = dev_bounds_R[(n_fields - 1) * n_cells + tid];
+#endif
 
     // calculate primitive variables
     vxl = mxl / dl;
     vyl = myl / dl;
     vzl = mzl / dl;
-    #ifdef DE //PRESSURE_DE
-    E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl );
-    pl = hydro_utilities::Get_Pressure_From_DE( El, El - E_kin, dgel, gamma );
-    #else
-    pl  = (El - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    pl  = fmax(pl, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+#ifdef DE  // PRESSURE_DE
+    E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl);
+    pl    = hydro_utilities::Get_Pressure_From_DE(El, El - E_kin, dgel, gamma);
+#else
+    pl = (El - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0);
+#endif  // PRESSURE_DE
+    pl = fmax(pl, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
       scl[i] = dscl[i] / dl;
     }
-    #endif
-    #ifdef DE
+#endif
+#ifdef DE
     gel = dgel / dl;
-    #endif
+#endif
     vxr = mxr / dr;
     vyr = myr / dr;
     vzr = mzr / dr;
-    #ifdef DE //PRESSURE_DE
-    E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr );
-    pr = hydro_utilities::Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma );
-    #else
-    pr  = (Er - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    pr  = fmax(pr, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+#ifdef DE  // PRESSURE_DE
+    E_kin = 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr);
+    pr    = hydro_utilities::Get_Pressure_From_DE(Er, Er - E_kin, dger, gamma);
+#else
+    pr = (Er - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0);
+#endif  // PRESSURE_DE
+    pr = fmax(pr, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
       scr[i] = dscr[i] / dr;
     }
-    #endif
-    #ifdef DE
+#endif
+#ifdef DE
     ger = dger / dr;
-    #endif
+#endif
 
     // calculate the enthalpy in each cell
     Hl = (El + pl) / dl;
@@ -139,170 +146,162 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_
     // (see Stone et al., 2008, Eqn 65, or Toro 2009, 11.118)
     sqrtdl = sqrt(dl);
     sqrtdr = sqrt(dr);
-    vx = (sqrtdl*vxl + sqrtdr*vxr) / (sqrtdl + sqrtdr);
-    vy = (sqrtdl*vyl + sqrtdr*vyr) / (sqrtdl + sqrtdr);
-    vz = (sqrtdl*vzl + sqrtdr*vzr) / (sqrtdl + sqrtdr);
-    H  = (sqrtdl*Hl  + sqrtdr*Hr)  / (sqrtdl + sqrtdr);
+    vx     = (sqrtdl * vxl + sqrtdr * vxr) / (sqrtdl + sqrtdr);
+    vy     = (sqrtdl * vyl + sqrtdr * vyr) / (sqrtdl + sqrtdr);
+    vz     = (sqrtdl * vzl + sqrtdr * vzr) / (sqrtdl + sqrtdr);
+    H      = (sqrtdl * Hl + sqrtdr * Hr) / (sqrtdl + sqrtdr);
 
     // calculate the sound speed squared (Stone B2)
-    vsq = (vx*vx + vy*vy + vz*vz);
-    asq = g1*(H - 0.5*vsq);
-    a = sqrt(asq);
+    vsq = (vx * vx + vy * vy + vz * vz);
+    asq = g1 * (H - 0.5 * vsq);
+    a   = sqrt(asq);
 
-    // calculate the averaged eigenvectors of the Roe matrix (Stone Eqn B2, Toro 11.107)
+    // calculate the averaged eigenvectors of the Roe matrix (Stone Eqn B2,
+    // Toro 11.107)
     lambda_m = vx - a;
     lambda_p = vx + a;
 
-
     // compute max and min wave speeds
-    cfl = sqrt(gamma*pl/dl);  // sound speed in left state
-    cfr = sqrt(gamma*pr/dr);  // sound speed in right state
+    cfl = sqrt(gamma * pl / dl);  // sound speed in left state
+    cfr = sqrt(gamma * pr / dr);  // sound speed in right state
 
-    // for signal speeds, take max/min of Roe eigenvalues and left and right sound speeds
-    // Batten eqn. 48
+    // for signal speeds, take max/min of Roe eigenvalues and left and right
+    // sound speeds Batten eqn. 48
     Sl = fmin(lambda_m, vxl - cfl);
     Sr = fmax(lambda_p, vxr + cfr);
 
     // if the H-correction is turned on, add cross-flux dissipation
-    Sl = sgn_CUDA(Sl)*fmax(fabs(Sl), etah);
-    Sr = sgn_CUDA(Sr)*fmax(fabs(Sr), etah);
-
+    Sl = sgn_CUDA(Sl) * fmax(fabs(Sl), etah);
+    Sr = sgn_CUDA(Sr) * fmax(fabs(Sr), etah);
 
     // left and right fluxes
     f_d_l  = mxl;
-    f_mx_l = mxl*vxl + pl;
-    f_my_l = myl*vxl;
-    f_mz_l = mzl*vxl;
-    f_E_l  = (El + pl)*vxl;
-    #ifdef DE
-    f_ge_l = dgel*vxl;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      f_sc_l[i] = dscl[i]*vxl;
+    f_mx_l = mxl * vxl + pl;
+    f_my_l = myl * vxl;
+    f_mz_l = mzl * vxl;
+    f_E_l  = (El + pl) * vxl;
+#ifdef DE
+    f_ge_l = dgel * vxl;
+#endif
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      f_sc_l[i] = dscl[i] * vxl;
     }
-    #endif
+#endif
 
     f_d_r  = mxr;
-    f_mx_r = mxr*vxr + pr;
-    f_my_r = myr*vxr;
-    f_mz_r = mzr*vxr;
-    f_E_r  = (Er + pr)*vxr;
-    #ifdef DE
-    f_ge_r = dger*vxr;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      f_sc_r[i] = dscr[i]*vxr;
+    f_mx_r = mxr * vxr + pr;
+    f_my_r = myr * vxr;
+    f_mz_r = mzr * vxr;
+    f_E_r  = (Er + pr) * vxr;
+#ifdef DE
+    f_ge_r = dger * vxr;
+#endif
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      f_sc_r[i] = dscr[i] * vxr;
     }
-    #endif
+#endif
 
     // return upwind flux if flow is supersonic
     if (Sl > 0.0) {
-      dev_flux[           tid] = f_d_l;
-      dev_flux[o1*n_cells+tid] = f_mx_l;
-      dev_flux[o2*n_cells+tid] = f_my_l;
-      dev_flux[o3*n_cells+tid] = f_mz_l;
-      dev_flux[4*n_cells+tid]  = f_E_l;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid]  = f_sc_l[i];
+      dev_flux[tid]                = f_d_l;
+      dev_flux[o1 * n_cells + tid] = f_mx_l;
+      dev_flux[o2 * n_cells + tid] = f_my_l;
+      dev_flux[o3 * n_cells + tid] = f_mz_l;
+      dev_flux[4 * n_cells + tid]  = f_E_l;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_sc_l[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid]  = f_ge_l;
-      #endif
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_l;
+#endif
       return;
-    }
-    else if (Sr < 0.0) {
-      dev_flux[           tid] = f_d_r;
-      dev_flux[o1*n_cells+tid] = f_mx_r;
-      dev_flux[o2*n_cells+tid] = f_my_r;
-      dev_flux[o3*n_cells+tid] = f_mz_r;
-      dev_flux[4*n_cells+tid]  = f_E_r;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid]  = f_sc_r[i];
+    } else if (Sr < 0.0) {
+      dev_flux[tid]                = f_d_r;
+      dev_flux[o1 * n_cells + tid] = f_mx_r;
+      dev_flux[o2 * n_cells + tid] = f_my_r;
+      dev_flux[o3 * n_cells + tid] = f_mz_r;
+      dev_flux[4 * n_cells + tid]  = f_E_r;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_sc_r[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid]  = f_ge_r;
-      #endif
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_r;
+#endif
       return;
     }
     // otherwise compute subsonic flux
     else {
-
-      // compute contact wave speed and pressure in star region (Batten eqns 34 & 36)
-      Sm = (dr*vxr*(Sr - vxr) - dl*vxl*(Sl - vxl) + pl - pr) / (dr*(Sr - vxr) - dl*(Sl - vxl));
-      ps = dl*(vxl - Sl)*(vxl - Sm) + pl;
+      // compute contact wave speed and pressure in star region (Batten eqns 34
+      // & 36)
+      Sm = (dr * vxr * (Sr - vxr) - dl * vxl * (Sl - vxl) + pl - pr) / (dr * (Sr - vxr) - dl * (Sl - vxl));
+      ps = dl * (vxl - Sl) * (vxl - Sm) + pl;
 
       // conserved variables in the left star state (Batten eqns 35 - 40)
-      dls = dl * (Sl - vxl) / (Sl - Sm);
-      mxls = (mxl*(Sl - vxl) + ps - pl) / (Sl - Sm);
-      myls = dls*vyl;
-      mzls = dls*vzl;
-      Els = (El*(Sl - vxl) - pl*vxl + ps*Sm) / (Sl - Sm);
-      #ifdef DE
-      gels = dls*gel;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        scls[i] = dls*scl[i];
+      dls  = dl * (Sl - vxl) / (Sl - Sm);
+      mxls = (mxl * (Sl - vxl) + ps - pl) / (Sl - Sm);
+      myls = dls * vyl;
+      mzls = dls * vzl;
+      Els  = (El * (Sl - vxl) - pl * vxl + ps * Sm) / (Sl - Sm);
+#ifdef DE
+      gels = dls * gel;
+#endif
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        scls[i] = dls * scl[i];
       }
-      #endif
+#endif
 
       // conserved variables in the right star state
-      drs = dr * (Sr - vxr) / (Sr - Sm);
-      mxrs = (mxr*(Sr - vxr) + ps - pr) / (Sr - Sm);
-      myrs = drs*vyr;
-      mzrs = drs*vzr;
-      Ers = (Er*(Sr - vxr) - pr*vxr + ps*Sm) / (Sr - Sm);
-      #ifdef DE
-      gers = drs*ger;
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        scrs[i] = drs*scr[i];
+      drs  = dr * (Sr - vxr) / (Sr - Sm);
+      mxrs = (mxr * (Sr - vxr) + ps - pr) / (Sr - Sm);
+      myrs = drs * vyr;
+      mzrs = drs * vzr;
+      Ers  = (Er * (Sr - vxr) - pr * vxr + ps * Sm) / (Sr - Sm);
+#ifdef DE
+      gers = drs * ger;
+#endif
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        scrs[i] = drs * scr[i];
       }
-      #endif
-
+#endif
 
       // compute the hllc flux (Batten eqn 27)
-      f_d  = 0.5*(f_d_l  + f_d_r  + (Sr - fabs(Sm))*drs  + (Sl + fabs(Sm))*dls  - Sl*dl  - Sr*dr);
-      f_mx = 0.5*(f_mx_l + f_mx_r + (Sr - fabs(Sm))*mxrs + (Sl + fabs(Sm))*mxls - Sl*mxl - Sr*mxr);
-      f_my = 0.5*(f_my_l + f_my_r + (Sr - fabs(Sm))*myrs + (Sl + fabs(Sm))*myls - Sl*myl - Sr*myr);
-      f_mz = 0.5*(f_mz_l + f_mz_r + (Sr - fabs(Sm))*mzrs + (Sl + fabs(Sm))*mzls - Sl*mzl - Sr*mzr);
-      f_E  = 0.5*(f_E_l  + f_E_r  + (Sr - fabs(Sm))*Ers  + (Sl + fabs(Sm))*Els  - Sl*El  - Sr*Er);
-      #ifdef DE
-      f_ge = 0.5*(f_ge_l + f_ge_r + (Sr - fabs(Sm))*gers + (Sl + fabs(Sm))*gels - Sl*dgel - Sr*dger);
-      #endif
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        f_sc[i] = 0.5*(f_sc_l[i] + f_sc_r[i] + (Sr - fabs(Sm))*scrs[i] + (Sl + fabs(Sm))*scls[i] - Sl*dscl[i] - Sr*dscr[i]);
+      f_d  = 0.5 * (f_d_l + f_d_r + (Sr - fabs(Sm)) * drs + (Sl + fabs(Sm)) * dls - Sl * dl - Sr * dr);
+      f_mx = 0.5 * (f_mx_l + f_mx_r + (Sr - fabs(Sm)) * mxrs + (Sl + fabs(Sm)) * mxls - Sl * mxl - Sr * mxr);
+      f_my = 0.5 * (f_my_l + f_my_r + (Sr - fabs(Sm)) * myrs + (Sl + fabs(Sm)) * myls - Sl * myl - Sr * myr);
+      f_mz = 0.5 * (f_mz_l + f_mz_r + (Sr - fabs(Sm)) * mzrs + (Sl + fabs(Sm)) * mzls - Sl * mzl - Sr * mzr);
+      f_E  = 0.5 * (f_E_l + f_E_r + (Sr - fabs(Sm)) * Ers + (Sl + fabs(Sm)) * Els - Sl * El - Sr * Er);
+#ifdef DE
+      f_ge = 0.5 * (f_ge_l + f_ge_r + (Sr - fabs(Sm)) * gers + (Sl + fabs(Sm)) * gels - Sl * dgel - Sr * dger);
+#endif
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        f_sc[i] = 0.5 * (f_sc_l[i] + f_sc_r[i] + (Sr - fabs(Sm)) * scrs[i] + (Sl + fabs(Sm)) * scls[i] - Sl * dscl[i] -
+                         Sr * dscr[i]);
       }
-      #endif
-
+#endif
 
       // return the hllc fluxes
-      dev_flux[           tid] = f_d;
-      dev_flux[o1*n_cells+tid] = f_mx;
-      dev_flux[o2*n_cells+tid] = f_my;
-      dev_flux[o3*n_cells+tid] = f_mz;
-      dev_flux[4*n_cells+tid]  = f_E;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid]  = f_sc[i];
+      dev_flux[tid]                = f_d;
+      dev_flux[o1 * n_cells + tid] = f_mx;
+      dev_flux[o2 * n_cells + tid] = f_my;
+      dev_flux[o3 * n_cells + tid] = f_mz;
+      dev_flux[4 * n_cells + tid]  = f_E;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_sc[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid]  = f_ge;
-      #endif
-
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge;
+#endif
     }
   }
-
 }
-
-
-#endif //CUDA
diff --git a/src/riemann_solvers/hllc_cuda.h b/src/riemann_solvers/hllc_cuda.h
index 23539b198..f10c7c43f 100644
--- a/src/riemann_solvers/hllc_cuda.h
+++ b/src/riemann_solvers/hllc_cuda.h
@@ -1,19 +1,16 @@
 /*! \file hllc_cuda.h
  *  \brief Declarations of functions for the cuda hllc riemann solver kernel. */
 
-#ifdef CUDA
-
 #ifndef HLLC_CUDA_H
 #define HLLC_CUDA_H
 
 #include "../global/global.h"
 
+/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int
+ * n_fields) \brief Roe Riemann solver based on the version described in Stone
+ * et al, 2008. */
+__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                           int nz, int n_ghost, Real gamma, int dir, int n_fields);
 
-/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
- *  \brief Roe Riemann solver based on the version described in Stone et al, 2008. */
-__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields);
-
-
-
-#endif //HLLC_CUDA_H
-#endif //CUDA
+#endif  // HLLC_CUDA_H
diff --git a/src/riemann_solvers/hllc_cuda_tests.cu b/src/riemann_solvers/hllc_cuda_tests.cu
index 644668cdd..829c536b7 100644
--- a/src/riemann_solvers/hllc_cuda_tests.cu
+++ b/src/riemann_solvers/hllc_cuda_tests.cu
@@ -1,216 +1,181 @@
 /*!
-* \file hllc_cuda_tests.cpp
-* \author Robert 'Bob' Caddy (rvc@pitt.edu)
-* \brief Test the code units within hllc_cuda.cu
-*
-*/
+ * \file hllc_cuda_tests.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Test the code units within hllc_cuda.cu
+ *
+ */
 
 // STL Includes
 #include <iostream>
-#include <vector>
 #include <string>
+#include <vector>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local Includes
 #include "../global/global_cuda.h"
+#include "../riemann_solvers/hllc_cuda.h"  // Include code to test
 #include "../utils/gpu.hpp"
 #include "../utils/testing_utilities.h"
-#include "../riemann_solvers/hllc_cuda.h"   // Include code to test
 
 #if defined(CUDA) && defined(HLLC)
 
-    // =========================================================================
-    /*!
-     * \brief Test fixture for simple testing of the HLLC Riemann Solver.
-       Effectively takes the left state, right state, fiducial fluxes, and
-       custom user output then performs all the required running and testing
-     *
-     */
-    class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test
-    {
-    protected:
-        // =====================================================================
-        /*!
-         * \brief Compute and return the HLLC fluxes
-         *
-         * \param[in] leftState The state on the left side in conserved
-         * variables. In order the elements are: density, x-momentum,
-         * y-momentum, z-momentum, and energy.
-         * \param[in] rightState The state on the right side in conserved
-         * variables. In order the elements are: density, x-momentum,
-         * y-momentum, z-momentum, and energy.
-         * \param[in] gamma The adiabatic index
-         * \return std::vector<double>
-         */
-        std::vector<Real> computeFluxes(std::vector<Real> const &stateLeft,
-                                        std::vector<Real> const &stateRight,
-                                        Real const &gamma)
-        {
-            // Simulation Paramters
-            int const nx        = 1;  // Number of cells in the x-direction?
-            int const ny        = 1;  // Number of cells in the y-direction?
-            int const nz        = 1;  // Number of cells in the z-direction?
-            int const nGhost    = 0;  // Isn't actually used it appears
-            int const direction = 0;  // Which direction, 0=x, 1=y, 2=z
-            int const nFields   = 5;  // Total number of conserved fields
-
-            // Launch Parameters
-            dim3 const dimGrid (1,1,1);  // How many blocks in the grid
-            dim3 const dimBlock(1,1,1);  // How many threads per block
-
-            // Create the std::vector to store the fluxes and declare the device
-            // pointers
-            std::vector<Real> testFlux(5);
-            Real *devConservedLeft;
-            Real *devConservedRight;
-            Real *devTestFlux;
-
-            // Allocate device arrays and copy data
-            CudaSafeCall(cudaMalloc(&devConservedLeft,  nFields*sizeof(Real)));
-            CudaSafeCall(cudaMalloc(&devConservedRight, nFields*sizeof(Real)));
-            CudaSafeCall(cudaMalloc(&devTestFlux,       nFields*sizeof(Real)));
-
-            CudaSafeCall(cudaMemcpy(devConservedLeft,
-                                    stateLeft.data(),
-                                    nFields*sizeof(Real),
-                                    cudaMemcpyHostToDevice));
-            CudaSafeCall(cudaMemcpy(devConservedRight,
-                                    stateRight.data(),
-                                    nFields*sizeof(Real),
-                                    cudaMemcpyHostToDevice));
-
-            // Run kernel
-            hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA,
-                               dimGrid,
-                               dimBlock,
-                               0,
-                               0,
-                               devConservedLeft,   // the "left" interface
-                               devConservedRight,  // the "right" interface
-                               devTestFlux,
-                               nx,
-                               ny,
-                               nz,
-                               nGhost,
-                               gamma,
-                               direction,
-                               nFields);
-
-            CudaCheckError();
-            CudaSafeCall(cudaMemcpy(testFlux.data(),
-                                    devTestFlux,
-                                    nFields*sizeof(Real),
-                                    cudaMemcpyDeviceToHost));
-
-            // Make sure to sync with the device so we have the results
-            cudaDeviceSynchronize();
-            CudaCheckError();
-
-            return testFlux;
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-         * \brief Check if the fluxes are correct
-         *
-         * \param[in] fiducialFlux The fiducial flux in conserved variables. In
-         * order the elements are: density, x-momentum, y-momentum, z-momentum,
-         * and energy.
-         * \param[in] testFlux The test flux in conserved variables. In order
-         * the elements are: density, x-momentum, y-momentum, z-momentum, and
-         * energy.
-         * \param[in] customOutput Any custom output the user would like to
-         * print. It will print after the default GTest output but before the
-         * values that failed are printed
-         */
-        void checkResults(std::vector<Real> const &fiducialFlux,
-                          std::vector<Real> const &testFlux,
-                          std::string const &customOutput = "")
-        {
-            // Field names
-            std::vector<std::string> const fieldNames {"Densities",
-                                                       "X Momentum",
-                                                       "Y Momentum",
-                                                       "Z Momentum",
-                                                       "Energies"};
-
-            ASSERT_TRUE(    (fiducialFlux.size() == testFlux.size())
-                        and (fiducialFlux.size() == fieldNames.size()))
-                        << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl
-                        << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl
-                        << "testFlux.size() = "     << testFlux.size()     << std::endl
-                        << "fieldNames.size() = "   << fieldNames.size()   << std::endl;
-
-            // Check for equality
-            for (size_t i = 0; i < fieldNames.size(); i++)
-            {
-                // Check for equality and if not equal return difference
-                double absoluteDiff;
-                int64_t ulpsDiff;
-
-                bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i],
-                                                                 testFlux[i],
-                                                                 absoluteDiff,
-                                                                 ulpsDiff);
-                EXPECT_TRUE(areEqual)
-                    << std::endl << customOutput << std::endl
-                    << "There's a difference in "      << fieldNames[i]   << " Flux" << std::endl
-                    << "The fiducial value is:       " << fiducialFlux[i] << std::endl
-                    << "The test value is:           " << testFlux[i]     << std::endl
-                    << "The absolute difference is:  " << absoluteDiff    << std::endl
-                    << "The ULP difference is:       " << ulpsDiff        << std::endl;
-            }
-        }
-        // =====================================================================
-
-    };
-    // =========================================================================
-
-    // =========================================================================
-    // Testing Calculate_HLLC_Fluxes_CUDA
-    /*!
-    * \brief Test the HLLC solver with the input from the high pressure side of a
-    sod shock tube. Correct results are hard coded into this test. Similar tests
-    do not need to be this verbose, simply passing values to the kernel call
-    should be sufficient in most cases
-    *
-    */
-    TEST_F(tHYDROCalculateHLLCFluxesCUDA,        // Test suite name
-           HighPressureSideExpectCorrectOutput)  // Test name
-    {
-        // Physical Values
-        Real const density   = 1.0;
-        Real const pressure  = 1.0;
-        Real const velocityX = 0.0;
-        Real const velocityY = 0.0;
-        Real const velocityZ = 0.0;
-        Real const momentumX = density * velocityX;
-        Real const momentumY = density * velocityY;
-        Real const momentumZ = density * velocityZ;
-        Real const gamma     = 1.4;
-        Real const energy    = (pressure/(gamma - 1)) + 0.5 * density
-                               * (velocityX*velocityX
-                                  + velocityY*velocityY
-                                  + velocityZ*velocityZ);
-
-        std::vector<Real> const state{density,
-                                      momentumX,
-                                      momentumY,
-                                      momentumZ,
-                                      energy};
-        std::vector<Real> const fiducialFluxes{0, 1, 0, 0, 0};
-
-        // Compute the fluxes
-        std::vector<Real> const testFluxes = computeFluxes(state,  // Left state
-                                                           state,  // Right state
-                                                           gamma); // Adiabatic Index
-
-        // Check for correctness
-        checkResults(fiducialFluxes, testFluxes);
+// =========================================================================
+/*!
+ * \brief Test fixture for simple testing of the HLLC Riemann Solver.
+   Effectively takes the left state, right state, fiducial fluxes, and
+   custom user output then performs all the required running and testing
+ *
+ */
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test
+{
+ protected:
+  // =====================================================================
+  /*!
+   * \brief Compute and return the HLLC fluxes
+   *
+   * \param[in] leftState The state on the left side in conserved
+   * variables. In order the elements are: density, x-momentum,
+   * y-momentum, z-momentum, and energy.
+   * \param[in] rightState The state on the right side in conserved
+   * variables. In order the elements are: density, x-momentum,
+   * y-momentum, z-momentum, and energy.
+   * \param[in] gamma The adiabatic index
+   * \return std::vector<double>
+   */
+  std::vector<Real> Compute_Fluxes(std::vector<Real> const &stateLeft, std::vector<Real> const &stateRight,
+                                   Real const &gamma)
+  {
+    // Simulation Paramters
+    int const nx        = 1;  // Number of cells in the x-direction?
+    int const ny        = 1;  // Number of cells in the y-direction?
+    int const nz        = 1;  // Number of cells in the z-direction?
+    int const nGhost    = 0;  // Isn't actually used it appears
+    int const direction = 0;  // Which direction, 0=x, 1=y, 2=z
+    int const nFields   = 5;  // Total number of conserved fields
+
+    // Launch Parameters
+    dim3 const dimGrid(1, 1, 1);   // How many blocks in the grid
+    dim3 const dimBlock(1, 1, 1);  // How many threads per block
+
+    // Create the std::vector to store the fluxes and declare the device
+    // pointers
+    std::vector<Real> testFlux(5);
+    Real *devConservedLeft;
+    Real *devConservedRight;
+    Real *devTestFlux;
+
+    // Allocate device arrays and copy data
+    GPU_Error_Check(cudaMalloc(&devConservedLeft, nFields * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc(&devConservedRight, nFields * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc(&devTestFlux, nFields * sizeof(Real)));
+
+    GPU_Error_Check(cudaMemcpy(devConservedLeft, stateLeft.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(cudaMemcpy(devConservedRight, stateRight.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice));
+
+    // Run kernel
+    hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0,
+                       devConservedLeft,   // the "left" interface
+                       devConservedRight,  // the "right" interface
+                       devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields);
+
+    GPU_Error_Check();
+    GPU_Error_Check(cudaMemcpy(testFlux.data(), devTestFlux, nFields * sizeof(Real), cudaMemcpyDeviceToHost));
+
+    // Make sure to sync with the device so we have the results
+    cudaDeviceSynchronize();
+    GPU_Error_Check();
+
+    return testFlux;
+  }
+  // =====================================================================
+
+  // =====================================================================
+  /*!
+   * \brief Check if the fluxes are correct
+   *
+   * \param[in] fiducialFlux The fiducial flux in conserved variables. In
+   * order the elements are: density, x-momentum, y-momentum, z-momentum,
+   * and energy.
+   * \param[in] testFlux The test flux in conserved variables. In order
+   * the elements are: density, x-momentum, y-momentum, z-momentum, and
+   * energy.
+   * \param[in] customOutput Any custom output the user would like to
+   * print. It will print after the default GTest output but before the
+   * values that failed are printed
+   */
+  void Check_Results(std::vector<Real> const &fiducialFlux, std::vector<Real> const &testFlux,
+                     std::string const &customOutput = "")
+  {
+    // Field names
+    std::vector<std::string> const fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", "Energies"};
+
+    ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size()))
+        << "The fiducial flux, test flux, and field name vectors are not all "
+           "the same length"
+        << std::endl
+        << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl
+        << "testFlux.size() = " << testFlux.size() << std::endl
+        << "fieldNames.size() = " << fieldNames.size() << std::endl;
+
+    // Check for equality
+    for (size_t i = 0; i < fieldNames.size(); i++) {
+      // Check for equality and if not equal return difference
+      double absoluteDiff;
+      int64_t ulpsDiff;
+
+      bool areEqual = testing_utilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff);
+      EXPECT_TRUE(areEqual) << std::endl
+                            << customOutput << std::endl
+                            << "There's a difference in " << fieldNames[i] << " Flux" << std::endl
+                            << "The fiducial value is:       " << fiducialFlux[i] << std::endl
+                            << "The test value is:           " << testFlux[i] << std::endl
+                            << "The absolute difference is:  " << absoluteDiff << std::endl
+                            << "The ULP difference is:       " << ulpsDiff << std::endl;
     }
-    // =========================================================================
+  }
+  // =====================================================================
+};
+// =========================================================================
+
+// =========================================================================
+// Testing Calculate_HLLC_Fluxes_CUDA
+/*!
+* \brief Test the HLLC solver with the input from the high pressure side of a
+sod shock tube. Correct results are hard coded into this test. Similar tests
+do not need to be this verbose, simply passing values to the kernel call
+should be sufficient in most cases
+*
+*/
+TEST_F(tHYDROCalculateHLLCFluxesCUDA,        // Test suite name
+       HighPressureSideExpectCorrectOutput)  // Test name
+{
+  // Physical Values
+  Real const density   = 1.0;
+  Real const pressure  = 1.0;
+  Real const velocityX = 0.0;
+  Real const velocityY = 0.0;
+  Real const velocityZ = 0.0;
+  Real const momentumX = density * velocityX;
+  Real const momentumY = density * velocityY;
+  Real const momentumZ = density * velocityZ;
+  Real const gamma     = 1.4;
+  Real const energy    = (pressure / (gamma - 1)) +
+                      0.5 * density * (velocityX * velocityX + velocityY * velocityY + velocityZ * velocityZ);
+
+  std::vector<Real> const state{density, momentumX, momentumY, momentumZ, energy};
+  std::vector<Real> const fiducialFluxes{0, 1, 0, 0, 0};
+
+  // Compute the fluxes
+  std::vector<Real> const testFluxes = Compute_Fluxes(state,   // Left state
+                                                      state,   // Right state
+                                                      gamma);  // Adiabatic Index
+
+  // Check for correctness
+  Check_Results(fiducialFluxes, testFluxes);
+}
+// =========================================================================
 
 #endif
diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu
index 489647bdb..80d6902c7 100644
--- a/src/riemann_solvers/hlld_cuda.cu
+++ b/src/riemann_solvers/hlld_cuda.cu
@@ -1,915 +1,513 @@
 /*!
  * \file hlld_cuda.cu
  * \author Robert 'Bob' Caddy (rvc@pitt.edu)
- * \brief Contains the implementation of the HLLD solver
+ * \brief Contains the implementation of the HLLD solver from Miyoshi & Kusano
+ * 2005 "A multi-state HLL approximate Riemann solver for ideal
+ * magnetohydrodynamics", hereafter referred to as M&K 2005
  *
-*/
+ */
 
 // External Includes
 
 // Local Includes
-#include "../utils/gpu.hpp"
 #include "../global/global.h"
 #include "../global/global_cuda.h"
-#include "../utils/mhd_utilities.h"
+#include "../grid/grid_enum.h"
 #include "../riemann_solvers/hlld_cuda.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/gpu.hpp"
+#include "../utils/hydro_utilities.h"
+#include "../utils/math_utilities.h"
+#include "../utils/mhd_utilities.h"
 
-#ifdef DE //PRESSURE_DE
-    #include "../utils/hydro_utilities.h"
-#endif // DE
-
-#ifdef CUDA
-    // =========================================================================
-    __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L,
-                                               Real *dev_bounds_R,
-                                               Real *dev_flux,
-                                               int nx,
-                                               int ny,
-                                               int nz,
-                                               int n_ghost,
-                                               Real gamma,
-                                               int direction,
-                                               int n_fields)
-    {
-        // get a thread index
-        int blockId  = blockIdx.x + blockIdx.y*gridDim.x;
-        int threadId = threadIdx.x + blockId * blockDim.x;
-        int zid = threadId / (nx*ny);
-        int yid = (threadId - zid*nx*ny) / nx;
-        int xid = threadId - zid*nx*ny - yid*nx;
-
-        // Number of cells
-        int n_cells = nx*ny*nz;
-
-        // Offsets & indices
-        int o1, o2, o3;
-        if (direction==0) {o1 = 1; o2 = 2; o3 = 3;}
-        if (direction==1) {o1 = 2; o2 = 3; o3 = 1;}
-        if (direction==2) {o1 = 3; o2 = 1; o3 = 2;}
-
-        // Thread guard to avoid overrun
-        if (xid < nx and yid < ny and zid < nz)
-        {
-            // ============================
-            // Retrieve conserved variables
-            // ============================
-            // Left interface
-            Real densityL   = dev_bounds_L[threadId];
-            Real momentumXL = dev_bounds_L[threadId + n_cells * o1];
-            Real momentumYL = dev_bounds_L[threadId + n_cells * o2];
-            Real momentumZL = dev_bounds_L[threadId + n_cells * o3];
-            Real energyL    = dev_bounds_L[threadId + n_cells * 4];
-            Real magneticXL = dev_bounds_L[threadId + n_cells * (o1 + 4 + NSCALARS)];
-            Real magneticYL = dev_bounds_L[threadId + n_cells * (o2 + 4 + NSCALARS)];
-            Real magneticZL = dev_bounds_L[threadId + n_cells * (o3 + 4 + NSCALARS)];
-
-            #ifdef SCALAR
-                Real scalarConservedL[NSCALARS];
-                for (int i=0; i<NSCALARS; i++)
-                {
-                    scalarConservedL[i] = dev_bounds_L[threadId + n_cells * (5+i)];
-                }
-            #endif // SCALAR
-            #ifdef DE
-                Real thermalEnergyConservedL = dev_bounds_L[threadId + n_cells * (n_fields-1)];
-            #endif // DE
-
-            // Right interface
-            Real densityR   = dev_bounds_R[threadId];
-            Real momentumXR = dev_bounds_R[threadId + n_cells * o1];
-            Real momentumYR = dev_bounds_R[threadId + n_cells * o2];
-            Real momentumZR = dev_bounds_R[threadId + n_cells * o3];
-            Real energyR    = dev_bounds_R[threadId + n_cells * 4];
-            Real magneticXR = dev_bounds_R[threadId + n_cells * (o1 + 4 + NSCALARS)];
-            Real magneticYR = dev_bounds_R[threadId + n_cells * (o2 + 4 + NSCALARS)];
-            Real magneticZR = dev_bounds_R[threadId + n_cells * (o3 + 4 + NSCALARS)];
-
-            #ifdef SCALAR
-                Real scalarConservedR[NSCALARS];
-                for (int i=0; i<NSCALARS; i++)
-                {
-                    scalarConservedR[i] = dev_bounds_R[threadId + n_cells * (5+i)];
-                }
-            #endif // SCALAR
-            #ifdef DE
-                Real thermalEnergyConservedR = dev_bounds_R[threadId + n_cells * (n_fields-1)];
-            #endif // DE
-
-            // Check for unphysical values
-            densityL = fmax(densityL, (Real) TINY_NUMBER);
-            densityR = fmax(densityR, (Real) TINY_NUMBER);
-            energyL  = fmax(energyL,  (Real) TINY_NUMBER);
-            energyR  = fmax(energyR,  (Real) TINY_NUMBER);
-
-            // ============================
-            // Compute primitive variables
-            // ============================
-            // Left interface
-            Real const velocityXL = momentumXL / densityL;
-            Real const velocityYL = momentumYL / densityL;
-            Real const velocityZL = momentumZL / densityL;
-
-            #ifdef DE //PRESSURE_DE
-                Real const energyKineticL = 0.5 * densityL
-                    * _hlldInternal::_dotProduct(velocityXL, velocityYL, velocityZL,
-                                                 velocityXL, velocityYL, velocityZL);
-
-                Real const energyMagneticL = 0.5
-                    * _hlldInternal::_dotProduct(magneticXL, magneticYL, magneticZL,
-                                                 magneticXL, magneticYL, magneticZL);
-
-                Real const gasPressureL   = fmax(hydro_utilities::Get_Pressure_From_DE(energyL,
-                                                                      energyL - energyKineticL - energyMagneticL,
-                                                                      thermalEnergyConservedL,
-                                                                      gamma),
-                                                 (Real) TINY_NUMBER);
-            #else
-                // Note that this function does the positive pressure check
-                // internally
-                Real const gasPressureL  = mhdUtils::computeGasPressure(energyL,
-                                                                        densityL,
-                                                                        momentumXL,
-                                                                        momentumYL,
-                                                                        momentumZL,
-                                                                        magneticXL,
-                                                                        magneticYL,
-                                                                        magneticZL,
-                                                                        gamma);
-            #endif //PRESSURE_DE
-
-            Real const totalPressureL = mhdUtils::computeTotalPressure(gasPressureL,
-                                                                       magneticXL,
-                                                                       magneticYL,
-                                                                       magneticZL);
-
-            // Right interface
-            Real const velocityXR = momentumXR / densityR;
-            Real const velocityYR = momentumYR / densityR;
-            Real const velocityZR = momentumZR / densityR;
-
-            #ifdef DE //PRESSURE_DE
-                Real const energyKineticR = 0.5 * densityR
-                    * _hlldInternal::_dotProduct(velocityXR, velocityYR, velocityZR,
-                                                 velocityXR, velocityYR, velocityZR);
-
-                Real const energyMagneticR = 0.5
-                    * _hlldInternal::_dotProduct(magneticXR, magneticYR, magneticZR,
-                                                 magneticXR, magneticYR, magneticZR);
-
-                Real const gasPressureR   = fmax(hydro_utilities::Get_Pressure_From_DE(energyR,
-                                                                      energyR - energyKineticR - energyMagneticR,
-                                                                      thermalEnergyConservedR,
-                                                                      gamma),
-                                                 (Real) TINY_NUMBER);
-            #else
-                // Note that this function does the positive pressure check
-                // internally
-                Real const gasPressureR  = mhdUtils::computeGasPressure(energyR,
-                                                                  densityR,
-                                                                  momentumXR,
-                                                                  momentumYR,
-                                                                  momentumZR,
-                                                                  magneticXR,
-                                                                  magneticYR,
-                                                                  magneticZR,
-                                                                  gamma);
-            #endif //PRESSURE_DE
-
-            Real const totalPressureR = mhdUtils::computeTotalPressure(gasPressureR,
-                                                                 magneticXR,
-                                                                 magneticYR,
-                                                                 magneticZR);
-
-            // Compute the approximate wave speeds and density in the star
-            // regions
-            Real speedL, speedR, speedM, speedStarL, speedStarR, densityStarL, densityStarR;
-            _hlldInternal::_approximateWaveSpeeds(densityL,
-                                                  momentumXL,
-                                                  momentumYL,
-                                                  momentumZL,
-                                                  velocityXL,
-                                                  velocityYL,
-                                                  velocityZL,
-                                                  gasPressureL,
-                                                  totalPressureL,
-                                                  magneticXL,
-                                                  magneticYL,
-                                                  magneticZL,
-                                                  densityR,
-                                                  momentumXR,
-                                                  momentumYR,
-                                                  momentumZR,
-                                                  velocityXR,
-                                                  velocityYR,
-                                                  velocityZR,
-                                                  gasPressureR,
-                                                  totalPressureR,
-                                                  magneticXR,
-                                                  magneticYR,
-                                                  magneticZR,
-                                                  gamma,
-                                                  speedL,
-                                                  speedR,
-                                                  speedM,
-                                                  speedStarL,
-                                                  speedStarR,
-                                                  densityStarL,
-                                                  densityStarR);
-
-            // =================================================================
-            // Compute the fluxes in the non-star states
-            // =================================================================
-            // Left state
-            Real densityFluxL, momentumFluxXL, momentumFluxYL, momentumFluxZL,
-                 magneticFluxYL, magneticFluxZL, energyFluxL;
-            _hlldInternal::_nonStarFluxes(momentumXL,
-                                          velocityXL,
-                                          velocityYL,
-                                          velocityZL,
-                                          totalPressureL,
-                                          energyL,
-                                          magneticXL,
-                                          magneticYL,
-                                          magneticZL,
-                                          densityFluxL,
-                                          momentumFluxXL,
-                                          momentumFluxYL,
-                                          momentumFluxZL,
-                                          magneticFluxYL,
-                                          magneticFluxZL,
-                                          energyFluxL);
-
-            // If we're in the L state then assign fluxes and return.
-            // In this state the flow is supersonic
-            if (speedL >= 0.0)
-            {
-                _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells,
-                                             dev_flux,
-                                             densityFluxL,
-                                             momentumFluxXL, momentumFluxYL, momentumFluxZL,
-                                             energyFluxL,
-                                             magneticFluxYL, magneticFluxZL);
-                #ifdef SCALAR
-                    for (int i=0; i<NSCALARS; i++)
-                    {
-                        dev_flux[(5+i)*n_cells+threadId]  = (scalarConservedL[i] / densityL) * densityFluxL;
-                    }
-                #endif  // SCALAR
-                #ifdef DE
-                    dev_flux[(n_fields-1)*n_cells+threadId]  = (thermalEnergyConservedL / densityL) * densityFluxL;
-                #endif  // DE
-                return;
-            }
-            // Right state
-            Real densityFluxR, momentumFluxXR, momentumFluxYR, momentumFluxZR,
-                 magneticFluxYR, magneticFluxZR, energyFluxR;
-            _hlldInternal::_nonStarFluxes(momentumXR,
-                                          velocityXR,
-                                          velocityYR,
-                                          velocityZR,
-                                          totalPressureR,
-                                          energyR,
-                                          magneticXR,
-                                          magneticYR,
-                                          magneticZR,
-                                          densityFluxR,
-                                          momentumFluxXR,
-                                          momentumFluxYR,
-                                          momentumFluxZR,
-                                          magneticFluxYR,
-                                          magneticFluxZR,
-                                          energyFluxR);
-
-            // If we're in the R state then assign fluxes and return.
-            // In this state the flow is supersonic
-            if (speedR <= 0.0)
-            {
-                _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells,
-                                             dev_flux,
-                                             densityFluxR,
-                                             momentumFluxXR, momentumFluxYR, momentumFluxZR,
-                                             energyFluxR,
-                                             magneticFluxYR, magneticFluxZR);
-                #ifdef SCALAR
-                    for (int i=0; i<NSCALARS; i++)
-                    {
-                        dev_flux[(5+i)*n_cells+threadId]  = (scalarConservedR[i] / densityR) * densityFluxR;
-                    }
-                #endif  // SCALAR
-                #ifdef DE
-                    dev_flux[(n_fields-1)*n_cells+threadId]  = (thermalEnergyConservedR / densityR) * densityFluxR;
-                #endif  // DE
-                return;
-            }
-
-            // =================================================================
-            // Compute the fluxes in the star states
-            // =================================================================
-            // Shared quantity
-            // note that velocityStarX = speedM
-            Real totalPressureStar = totalPressureL + densityL
-                                                      * (speedL - velocityXL)
-                                                      * (speedM - velocityXL);
-
-            // Left star state
-            Real velocityStarYL, velocityStarZL,
-                 energyStarL, magneticStarYL, magneticStarZL,
-                 densityStarFluxL,
-                 momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL,
-                 magneticStarFluxYL, magneticStarFluxZL, energyStarFluxL;
-            _hlldInternal::_starFluxes(speedM,
-                                       speedL,
-                                       densityL,
-                                       velocityXL,
-                                       velocityYL,
-                                       velocityZL,
-                                       momentumXL,
-                                       momentumYL,
-                                       momentumZL,
-                                       energyL,
-                                       totalPressureL,
-                                       magneticXL,
-                                       magneticYL,
-                                       magneticZL,
-                                       densityStarL,
-                                       totalPressureStar,
-                                       densityFluxL,
-                                       momentumFluxXL,
-                                       momentumFluxYL,
-                                       momentumFluxZL,
-                                       energyFluxL,
-                                       magneticFluxYL,
-                                       magneticFluxZL,
-                                       velocityStarYL,
-                                       velocityStarZL,
-                                       energyStarL,
-                                       magneticStarYL,
-                                       magneticStarZL,
-                                       densityStarFluxL,
-                                       momentumStarFluxXL,
-                                       momentumStarFluxYL,
-                                       momentumStarFluxZL,
-                                       energyStarFluxL,
-                                       magneticStarFluxYL,
-                                       magneticStarFluxZL);
-
-            // If we're in the L* state then assign fluxes and return.
-            // In this state the flow is subsonic
-            if (speedStarL >= 0.0)
-            {
-                _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells,
-                                             dev_flux,
-                                             densityStarFluxL,
-                                             momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL,
-                                             energyStarFluxL,
-                                             magneticStarFluxYL, magneticStarFluxZL);
-                #ifdef SCALAR
-                    for (int i=0; i<NSCALARS; i++)
-                    {
-                        dev_flux[(5+i)*n_cells+threadId] = (scalarConservedL[i] / densityL) * densityStarFluxL;
-                    }
-                #endif  // SCALAR
-                #ifdef DE
-                    dev_flux[(n_fields-1)*n_cells+threadId]  = (thermalEnergyConservedL / densityL) * densityStarFluxL;
-                #endif  // DE
-                return;
-            }
-
-            // Right star state
-            Real velocityStarYR, velocityStarZR,
-                 energyStarR, magneticStarYR, magneticStarZR,
-                 densityStarFluxR,
-                 momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR,
-                 magneticStarFluxYR, magneticStarFluxZR, energyStarFluxR;
-            _hlldInternal::_starFluxes(speedM,
-                                       speedR,
-                                       densityR,
-                                       velocityXR,
-                                       velocityYR,
-                                       velocityZR,
-                                       momentumXR,
-                                       momentumYR,
-                                       momentumZR,
-                                       energyR,
-                                       totalPressureR,
-                                       magneticXR,
-                                       magneticYR,
-                                       magneticZR,
-                                       densityStarR,
-                                       totalPressureStar,
-                                       densityFluxR,
-                                       momentumFluxXR,
-                                       momentumFluxYR,
-                                       momentumFluxZR,
-                                       energyFluxR,
-                                       magneticFluxYR,
-                                       magneticFluxZR,
-                                       velocityStarYR,
-                                       velocityStarZR,
-                                       energyStarR,
-                                       magneticStarYR,
-                                       magneticStarZR,
-                                       densityStarFluxR,
-                                       momentumStarFluxXR,
-                                       momentumStarFluxYR,
-                                       momentumStarFluxZR,
-                                       energyStarFluxR,
-                                       magneticStarFluxYR,
-                                       magneticStarFluxZR);
-
-            // If we're in the R* state then assign fluxes and return.
-            // In this state the flow is subsonic
-            if (speedStarR <= 0.0)
-            {
-                _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells,
-                                             dev_flux,
-                                             densityStarFluxR,
-                                             momentumStarFluxXR, momentumStarFluxYR, momentumStarFluxZR,
-                                             energyStarFluxR,
-                                             magneticStarFluxYR, magneticStarFluxZR);
-                #ifdef SCALAR
-                    for (int i=0; i<NSCALARS; i++)
-                    {
-                        dev_flux[(5+i)*n_cells+threadId] = (scalarConservedR[i] / densityR) * densityStarFluxR;
-                    }
-                #endif  // SCALAR
-                #ifdef DE
-                    dev_flux[(n_fields-1)*n_cells+threadId]  = (thermalEnergyConservedR / densityR) * densityStarFluxR;
-                #endif  // DE
-                return;
-            }
-
-            // =================================================================
-            // Compute the fluxes in the double star states
-            // =================================================================
-            Real velocityDoubleStarY, velocityDoubleStarZ,
-                 magneticDoubleStarY, magneticDoubleStarZ,
-                 energyDoubleStarL, energyDoubleStarR;
-            _hlldInternal::_doubleStarState(speedM,
-                                            magneticXL,
-                                            totalPressureStar,
-                                            densityStarL,
-                                            velocityStarYL,
-                                            velocityStarZL,
-                                            energyStarL,
-                                            magneticStarYL,
-                                            magneticStarZL,
-                                            densityStarR,
-                                            velocityStarYR,
-                                            velocityStarZR,
-                                            energyStarR,
-                                            magneticStarYR,
-                                            magneticStarZR,
-                                            velocityDoubleStarY,
-                                            velocityDoubleStarZ,
-                                            magneticDoubleStarY,
-                                            magneticDoubleStarZ,
-                                            energyDoubleStarL,
-                                            energyDoubleStarR);
-
-            // Compute and return L** fluxes
-            if (speedM >= 0.0)
-            {
-                Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ,
-                     energyDoubleStarFlux,
-                     magneticDoubleStarFluxY, magneticDoubleStarFluxZ;
-                _hlldInternal::_doubleStarFluxes(speedStarL,
-                                                 momentumStarFluxXL,
-                                                 momentumStarFluxYL,
-                                                 momentumStarFluxZL,
-                                                 energyStarFluxL,
-                                                 magneticStarFluxYL,
-                                                 magneticStarFluxZL,
-                                                 densityStarL,
-                                                 speedM,
-                                                 velocityStarYL,
-                                                 velocityStarZL,
-                                                 energyStarL,
-                                                 magneticStarYL,
-                                                 magneticStarZL,
-                                                 speedM,
-                                                 velocityDoubleStarY,
-                                                 velocityDoubleStarZ,
-                                                 energyDoubleStarL,
-                                                 magneticDoubleStarY,
-                                                 magneticDoubleStarZ,
-                                                 momentumDoubleStarFluxX,
-                                                 momentumDoubleStarFluxY,
-                                                 momentumDoubleStarFluxZ,
-                                                 energyDoubleStarFlux,
-                                                 magneticDoubleStarFluxY,
-                                                 magneticDoubleStarFluxZ);
-
-                _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells,
-                                             dev_flux,
-                                             densityStarFluxL,
-                                             momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ,
-                                             energyDoubleStarFlux,
-                                             magneticDoubleStarFluxY, magneticDoubleStarFluxZ);
-
-                #ifdef SCALAR
-                    // Return the passive scalar fluxes
-                    for (int i=0; i<NSCALARS; i++)
-                    {
-                        dev_flux[(5+i)*n_cells+threadId] = (scalarConservedL[i] / densityL) * densityStarFluxL;
-                    }
-                #endif // SCALAR
-                #ifdef DE
-                    dev_flux[(n_fields-1)*n_cells+threadId]  = (thermalEnergyConservedL / densityL) * densityStarFluxL;
-                #endif  // DE
-                return;
-            }
-            // Compute and return R** fluxes
-            else if (speedStarR >= 0.0)
-            {
-                Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ,
-                     energyDoubleStarFlux,
-                     magneticDoubleStarFluxY, magneticDoubleStarFluxZ;
-                _hlldInternal::_doubleStarFluxes(speedStarR,
-                                                 momentumStarFluxXR,
-                                                 momentumStarFluxYR,
-                                                 momentumStarFluxZR,
-                                                 energyStarFluxR,
-                                                 magneticStarFluxYR,
-                                                 magneticStarFluxZR,
-                                                 densityStarR,
-                                                 speedM,
-                                                 velocityStarYR,
-                                                 velocityStarZR,
-                                                 energyStarR,
-                                                 magneticStarYR,
-                                                 magneticStarZR,
-                                                 speedM,
-                                                 velocityDoubleStarY,
-                                                 velocityDoubleStarZ,
-                                                 energyDoubleStarR,
-                                                 magneticDoubleStarY,
-                                                 magneticDoubleStarZ,
-                                                 momentumDoubleStarFluxX,
-                                                 momentumDoubleStarFluxY,
-                                                 momentumDoubleStarFluxZ,
-                                                 energyDoubleStarFlux,
-                                                 magneticDoubleStarFluxY,
-                                                 magneticDoubleStarFluxZ);
-
-                _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells,
-                                             dev_flux,
-                                             densityStarFluxR,
-                                             momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ,
-                                             energyDoubleStarFlux,
-                                             magneticDoubleStarFluxY, magneticDoubleStarFluxZ);
-
-                #ifdef SCALAR
-                    // Return the passive scalar fluxes
-                    for (int i=0; i<NSCALARS; i++)
-                    {
-                        dev_flux[(5+i)*n_cells+threadId] = (scalarConservedR[i] / densityR) * densityStarFluxR;
-                    }
-                #endif // SCALAR
-                #ifdef DE
-                    dev_flux[(n_fields-1)*n_cells+threadId]  = (thermalEnergyConservedR / densityR) * densityStarFluxR;
-                #endif  // DE
-                return;
-            }
-        } // End thread guard
-    };
-    // =========================================================================
-
-    namespace _hlldInternal
-    {
-        // =====================================================================
-        __device__ __host__ void _approximateWaveSpeeds(Real const &densityL,
-                                                        Real const &momentumXL,
-                                                        Real const &momentumYL,
-                                                        Real const &momentumZL,
-                                                        Real const &velocityXL,
-                                                        Real const &velocityYL,
-                                                        Real const &velocityZL,
-                                                        Real const &gasPressureL,
-                                                        Real const &totalPressureL,
-                                                        Real const &magneticXL,
-                                                        Real const &magneticYL,
-                                                        Real const &magneticZL,
-                                                        Real const &densityR,
-                                                        Real const &momentumXR,
-                                                        Real const &momentumYR,
-                                                        Real const &momentumZR,
-                                                        Real const &velocityXR,
-                                                        Real const &velocityYR,
-                                                        Real const &velocityZR,
-                                                        Real const &gasPressureR,
-                                                        Real const &totalPressureR,
-                                                        Real const &magneticXR,
-                                                        Real const &magneticYR,
-                                                        Real const &magneticZR,
-                                                        Real const &gamma,
-                                                        Real &speedL,
-                                                        Real &speedR,
-                                                        Real &speedM,
-                                                        Real &speedStarL,
-                                                        Real &speedStarR,
-                                                        Real &densityStarL,
-                                                        Real &densityStarR)
-        {
-            // Get the fast magnetosonic wave speeds
-            Real magSonicL = mhdUtils::fastMagnetosonicSpeed(densityL,
-                                                             gasPressureL,
-                                                             magneticXL,
-                                                             magneticYL,
-                                                             magneticZL,
-                                                             gamma);
-            Real magSonicR = mhdUtils::fastMagnetosonicSpeed(densityR,
-                                                             gasPressureR,
-                                                             magneticXR,
-                                                             magneticYR,
-                                                             magneticZR,
-                                                             gamma);
-
-            // Compute the S_L and S_R wave speeds.
-            // Version suggested by Miyoshi & Kusano 2005 and used in Athena
-            Real magSonicMax = fmax(magSonicL, magSonicR);
-            speedL = fmin(velocityXL, velocityXR) - magSonicMax;
-            speedR = fmax(velocityXL, velocityXR) + magSonicMax;
-
-            // Compute the S_M wave speed
-            speedM = // Numerator
-                          ( momentumXR * (speedR - velocityXR)
-                          - momentumXL * (speedL - velocityXL)
-                          + (totalPressureL - totalPressureR))
-                          /
-                          // Denominator
-                          ( densityR * (speedR - velocityXR)
-                          - densityL * (speedL - velocityXL));
-
-            // Compute the densities in the star state
-            densityStarL = densityL * (speedL - velocityXL) / (speedL - speedM);
-            densityStarR = densityR * (speedR - velocityXR) / (speedR - speedM);
-
-            // Compute the S_L^* and S_R^* wave speeds
-            speedStarL = speedM - mhdUtils::alfvenSpeed(magneticXL, densityStarL);
-            speedStarR = speedM + mhdUtils::alfvenSpeed(magneticXR, densityStarR);
-        }
-        // =====================================================================
-
-        // =====================================================================
-        __device__ __host__ void _nonStarFluxes(Real const &momentumX,
-                                                Real const &velocityX,
-                                                Real const &velocityY,
-                                                Real const &velocityZ,
-                                                Real const &totalPressure,
-                                                Real const &energy,
-                                                Real const &magneticX,
-                                                Real const &magneticY,
-                                                Real const &magneticZ,
-                                                Real &densityFlux,
-                                                Real &momentumFluxX,
-                                                Real &momentumFluxY,
-                                                Real &momentumFluxZ,
-                                                Real &magneticFluxY,
-                                                Real &magneticFluxZ,
-                                                Real &energyFlux)
-        {
-            densityFlux   = momentumX;
-
-            momentumFluxX = momentumX * velocityX + totalPressure - magneticX * magneticX;
-            momentumFluxY = momentumX * velocityY - magneticX * magneticY;
-            momentumFluxZ = momentumX * velocityZ - magneticX * magneticZ;
-
-            magneticFluxY = magneticY * velocityX - magneticX * velocityY;
-            magneticFluxZ = magneticZ * velocityX - magneticX * velocityZ;
-
-            // Group transverse terms for FP associative symmetry
-            energyFlux    = velocityX * (energy + totalPressure) - magneticX
-                            * (velocityX * magneticX
-                               + ((velocityY * magneticY)
-                               + (velocityZ * magneticZ)));
-        }
-        // =====================================================================
-
-        // =====================================================================
-        __device__ __host__  void _returnFluxes(int const &threadId,
-                                                int const &o1,
-                                                int const &o2,
-                                                int const &o3,
-                                                int const &n_cells,
-                                                Real *dev_flux,
-                                                Real const &densityFlux,
-                                                Real const &momentumFluxX,
-                                                Real const &momentumFluxY,
-                                                Real const &momentumFluxZ,
-                                                Real const &energyFlux,
-                                                Real const &magneticFluxY,
-                                                Real const &magneticFluxZ)
-        {
-            dev_flux[threadId]                                 = densityFlux;
-            dev_flux[threadId + n_cells * o1]                  = momentumFluxX;
-            dev_flux[threadId + n_cells * o2]                  = momentumFluxY;
-            dev_flux[threadId + n_cells * o3]                  = momentumFluxZ;
-            dev_flux[threadId + n_cells * 4]                   = energyFlux;
-            dev_flux[threadId + n_cells * (o2 + 4 + NSCALARS)] = magneticFluxY;
-            dev_flux[threadId + n_cells * (o3 + 4 + NSCALARS)] = magneticFluxZ;
-        }
-        // =====================================================================
-
-        // =====================================================================
-        __device__ __host__ void _starFluxes(Real const &speedM,
-                                             Real const &speedSide,
-                                             Real const &density,
-                                             Real const &velocityX,
-                                             Real const &velocityY,
-                                             Real const &velocityZ,
-                                             Real const &momentumX,
-                                             Real const &momentumY,
-                                             Real const &momentumZ,
-                                             Real const &energy,
-                                             Real const &totalPressure,
-                                             Real const &magneticX,
-                                             Real const &magneticY,
-                                             Real const &magneticZ,
-                                             Real const &densityStar,
-                                             Real const &totalPressureStar,
-                                             Real const &densityFlux,
-                                             Real const &momentumFluxX,
-                                             Real const &momentumFluxY,
-                                             Real const &momentumFluxZ,
-                                             Real const &energyFlux,
-                                             Real const &magneticFluxY,
-                                             Real const &magneticFluxZ,
-                                             Real &velocityStarY,
-                                             Real &velocityStarZ,
-                                             Real &energyStar,
-                                             Real &magneticStarY,
-                                             Real &magneticStarZ,
-                                             Real &densityStarFlux,
-                                             Real &momentumStarFluxX,
-                                             Real &momentumStarFluxY,
-                                             Real &momentumStarFluxZ,
-                                             Real &energyStarFlux,
-                                             Real &magneticStarFluxY,
-                                             Real &magneticStarFluxZ)
-        {
-            // Check for and handle the degenerate case
-            if (fabs(density * (speedSide - velocityX)
-                             * (speedSide - speedM)
-                             - (magneticX * magneticX))
-                < totalPressureStar * _hlldInternal::_hlldSmallNumber)
-            {
-                velocityStarY = velocityY;
-                velocityStarZ = velocityZ;
-                magneticStarY = magneticY;
-                magneticStarZ = magneticZ;
-            }
-            else
-            {
-                Real const denom = density * (speedSide - velocityX)
-                                           * (speedSide - speedM)
-                                           - (magneticX * magneticX);
-
-                // Compute the velocity and magnetic field in the star state
-                Real coef     = magneticX  * (speedM - velocityX) / denom;
-                velocityStarY = velocityY - magneticY * coef;
-                velocityStarZ = velocityZ - magneticZ * coef;
-
-                Real tmpPower = (speedSide - velocityX);
-                tmpPower = tmpPower * tmpPower;
-                coef = (density * tmpPower - (magneticX * magneticX)) / denom;
-                magneticStarY = magneticY * coef;
-                magneticStarZ = magneticZ * coef;
-            }
-
-            energyStar = ( energy * (speedSide - velocityX)
-                        - totalPressure * velocityX
-                        + totalPressureStar * speedM
-                        + magneticX * (_hlldInternal::_dotProduct(velocityX, velocityY, velocityZ, magneticX, magneticY, magneticZ)
-                                     - _hlldInternal::_dotProduct(speedM, velocityStarY, velocityStarZ, magneticX, magneticStarY, magneticStarZ)))
-                        / (speedSide - speedM);
-
-            // Now compute the star state fluxes
-            densityStarFlux   = densityFlux   + speedSide * (densityStar - density);;
-            momentumStarFluxX = momentumFluxX + speedSide * (densityStar * speedM - momentumX);;
-            momentumStarFluxY = momentumFluxY + speedSide * (densityStar * velocityStarY - momentumY);;
-            momentumStarFluxZ = momentumFluxZ + speedSide * (densityStar * velocityStarZ - momentumZ);;
-            energyStarFlux    = energyFlux    + speedSide * (energyStar  - energy);
-            magneticStarFluxY = magneticFluxY + speedSide * (magneticStarY - magneticY);
-            magneticStarFluxZ = magneticFluxZ + speedSide * (magneticStarZ - magneticZ);
-        }
-        // =====================================================================
-
-        // =====================================================================
-        __device__ __host__ void _doubleStarState(Real const &speedM,
-                                                  Real const &magneticX,
-                                                  Real const &totalPressureStar,
-                                                  Real const &densityStarL,
-                                                  Real const &velocityStarYL,
-                                                  Real const &velocityStarZL,
-                                                  Real const &energyStarL,
-                                                  Real const &magneticStarYL,
-                                                  Real const &magneticStarZL,
-                                                  Real const &densityStarR,
-                                                  Real const &velocityStarYR,
-                                                  Real const &velocityStarZR,
-                                                  Real const &energyStarR,
-                                                  Real const &magneticStarYR,
-                                                  Real const &magneticStarZR,
-                                                  Real &velocityDoubleStarY,
-                                                  Real &velocityDoubleStarZ,
-                                                  Real &magneticDoubleStarY,
-                                                  Real &magneticDoubleStarZ,
-                                                  Real &energyDoubleStarL,
-                                                  Real &energyDoubleStarR)
-        {
-            // if Bx is zero then just return the star state
-            if (magneticX < _hlldInternal::_hlldSmallNumber * totalPressureStar)
-            {
-                velocityDoubleStarY = velocityStarYL;
-                velocityDoubleStarZ = velocityStarZL;
-                magneticDoubleStarY = magneticStarYL;
-                magneticDoubleStarZ = magneticStarZL;
-                energyDoubleStarL    = energyStarL;
-                energyDoubleStarR    = energyStarR;
-            }
-            else
-            {
-                // Setup some variables we'll need later
-                Real sqrtDL = sqrt(densityStarL);
-                Real sqrtDR = sqrt(densityStarR);
-                Real inverseDensities = 1.0 / (sqrtDL + sqrtDR);
-                Real magXSign = copysign(1.0, magneticX);
-
-                // All we need to do now is compute the transverse velocities
-                // and magnetic fields along with the energy
-
-                // Double Star velocities
-                velocityDoubleStarY = inverseDensities * (sqrtDL * velocityStarYL
-                                      + sqrtDR * velocityStarYR
-                                      + magXSign * (magneticStarYR - magneticStarYL));
-                velocityDoubleStarZ = inverseDensities * (sqrtDL * velocityStarZL
-                                      + sqrtDR * velocityStarZR
-                                      + magXSign * (magneticStarZR - magneticStarZL));
-
-                // Double star magnetic fields
-                magneticDoubleStarY = inverseDensities * (sqrtDL * magneticStarYR
-                                      + sqrtDR * magneticStarYL
-                                      + magXSign * (sqrtDL * sqrtDR) * (velocityStarYR - velocityStarYL));
-                magneticDoubleStarZ = inverseDensities * (sqrtDL * magneticStarZR
-                                      + sqrtDR * magneticStarZL
-                                      + magXSign * (sqrtDL * sqrtDR) * (velocityStarZR - velocityStarZL));
-
-                // Double star energy
-                Real velDblStarDotMagDblStar = _hlldInternal::_dotProduct(speedM,
-                                                                          velocityDoubleStarY,
-                                                                          velocityDoubleStarZ,
-                                                                          magneticX,
-                                                                          magneticDoubleStarY,
-                                                                          magneticDoubleStarZ);
-                energyDoubleStarL = energyStarL - sqrtDL * magXSign
-                    * (_hlldInternal::_dotProduct(speedM, velocityStarYL, velocityStarZL, magneticX, magneticStarYL, magneticStarZL)
-                    - velDblStarDotMagDblStar);
-                energyDoubleStarR = energyStarR + sqrtDR * magXSign
-                    * (_hlldInternal::_dotProduct(speedM, velocityStarYR, velocityStarZR, magneticX, magneticStarYR, magneticStarZR)
-                    - velDblStarDotMagDblStar);
-            }
-        }
-        // =====================================================================
-
-        // =====================================================================
-        __device__ __host__ void _doubleStarFluxes(Real const &speedStarSide,
-                                                   Real const &momentumStarFluxX,
-                                                   Real const &momentumStarFluxY,
-                                                   Real const &momentumStarFluxZ,
-                                                   Real const &energyStarFlux,
-                                                   Real const &magneticStarFluxY,
-                                                   Real const &magneticStarFluxZ,
-                                                   Real const &densityStar,
-                                                   Real const &velocityStarX,
-                                                   Real const &velocityStarY,
-                                                   Real const &velocityStarZ,
-                                                   Real const &energyStar,
-                                                   Real const &magneticStarY,
-                                                   Real const &magneticStarZ,
-                                                   Real const &velocityDoubleStarX,
-                                                   Real const &velocityDoubleStarY,
-                                                   Real const &velocityDoubleStarZ,
-                                                   Real const &energyDoubleStar,
-                                                   Real const &magneticDoubleStarY,
-                                                   Real const &magneticDoubleStarZ,
-                                                   Real &momentumDoubleStarFluxX,
-                                                   Real &momentumDoubleStarFluxY,
-                                                   Real &momentumDoubleStarFluxZ,
-                                                   Real &energyDoubleStarFlux,
-                                                   Real &magneticDoubleStarFluxY,
-                                                   Real &magneticDoubleStarFluxZ)
-        {
-            momentumDoubleStarFluxX = momentumStarFluxX + speedStarSide * (velocityDoubleStarX - velocityStarX) * densityStar;
-            momentumDoubleStarFluxY = momentumStarFluxY + speedStarSide * (velocityDoubleStarY - velocityStarY) * densityStar;
-            momentumDoubleStarFluxZ = momentumStarFluxZ + speedStarSide * (velocityDoubleStarZ - velocityStarZ) * densityStar;
-            energyDoubleStarFlux    = energyStarFlux    + speedStarSide * (energyDoubleStar    - energyStar);
-            magneticDoubleStarFluxY = magneticStarFluxY + speedStarSide * (magneticDoubleStarY - magneticStarY);
-            magneticDoubleStarFluxZ = magneticStarFluxZ + speedStarSide * (magneticDoubleStarZ - magneticStarZ);
-        }
-        // =====================================================================
-
-    } // _hlldInternal namespace
-
-
-#endif // CUDA
\ No newline at end of file
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
+#endif  // DE
+
+#ifdef MHD
+namespace mhd
+{
+// =========================================================================
+__global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const *dev_bounds_R,
+                                           Real const *dev_magnetic_face, Real *dev_flux, int const n_cells,
+                                           Real const gamma, int const direction, int const n_fields)
+{
+  // get a thread index
+  int const threadId = threadIdx.x + blockIdx.x * blockDim.x;
+
+  // Thread guard to avoid overrun
+  if (threadId >= n_cells) {
+    return;
+  }
+
+  // Offsets & indices
+  int o1, o2, o3;
+  switch (direction) {
+    case 0:
+      o1 = grid_enum::momentum_x;
+      o2 = grid_enum::momentum_y;
+      o3 = grid_enum::momentum_z;
+      break;
+    case 1:
+      o1 = grid_enum::momentum_y;
+      o2 = grid_enum::momentum_z;
+      o3 = grid_enum::momentum_x;
+      break;
+    case 2:
+      o1 = grid_enum::momentum_z;
+      o2 = grid_enum::momentum_x;
+      o3 = grid_enum::momentum_y;
+      break;
+  }
+
+  // ============================
+  // Retrieve state variables
+  // ============================
+  // The magnetic field in the X-direction
+  Real const magneticX = dev_magnetic_face[threadId];
+
+  mhd::internal::State const stateL =
+      mhd::internal::loadState(dev_bounds_L, magneticX, gamma, threadId, n_cells, o1, o2, o3);
+  mhd::internal::State const stateR =
+      mhd::internal::loadState(dev_bounds_R, magneticX, gamma, threadId, n_cells, o1, o2, o3);
+
+  // Compute the approximate Left and Right wave speeds
+  mhd::internal::Speeds speed = mhd::internal::approximateLRWaveSpeeds(stateL, stateR, magneticX, gamma);
+
+  // =================================================================
+  // Compute the fluxes in the non-star states
+  // =================================================================
+  // Left state
+  mhd::internal::Flux fluxL = mhd::internal::nonStarFluxes(stateL, magneticX);
+
+  // If we're in the L state then assign fluxes and return.
+  // In this state the flow is supersonic
+  // M&K 2005 equation 66
+  if (speed.L > 0.0) {
+    mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL);
+    return;
+  }
+  // Right state
+  mhd::internal::Flux fluxR = mhd::internal::nonStarFluxes(stateR, magneticX);
+
+  // If we're in the R state then assign fluxes and return.
+  // In this state the flow is supersonic
+  // M&K 2005 equation 66
+  if (speed.R < 0.0) {
+    mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR);
+    return;
+  }
+
+  // =================================================================
+  // Compute the fluxes in the star states
+  // =================================================================
+  // Shared quantities:
+  // - velocityStarX = speedM
+  // - totalPrssureStar is the same on both sides
+  speed.M                      = approximateMiddleWaveSpeed(stateL, stateR, speed);
+  Real const totalPressureStar = mhd::internal::starTotalPressure(stateL, stateR, speed);
+
+  // Left star state
+  mhd::internal::StarState const starStateL =
+      mhd::internal::computeStarState(stateL, speed, speed.L, magneticX, totalPressureStar);
+
+  // Left star speed
+  speed.LStar = mhd::internal::approximateStarWaveSpeed(starStateL, speed, magneticX, -1);
+
+  // If we're in the L* state then assign fluxes and return.
+  // In this state the flow is subsonic
+  // M&K 2005 equation 66
+  if (speed.LStar > 0.0 and speed.L <= 0.0) {
+    fluxL = mhd::internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L);
+    mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL);
+    return;
+  }
+
+  // Right star state
+  mhd::internal::StarState const starStateR =
+      mhd::internal::computeStarState(stateR, speed, speed.R, magneticX, totalPressureStar);
+
+  // Right star speed
+  speed.RStar = mhd::internal::approximateStarWaveSpeed(starStateR, speed, magneticX, 1);
+
+  // If we're in the R* state then assign fluxes and return.
+  // In this state the flow is subsonic
+  // M&K 2005 equation 66
+  if (speed.RStar <= 0.0 and speed.R >= 0.0) {
+    fluxR = mhd::internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R);
+    mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR);
+    return;
+  }
+
+  // =================================================================
+  // Compute the fluxes in the double star states
+  // =================================================================
+  mhd::internal::DoubleStarState const doubleStarState =
+      mhd::internal::computeDoubleStarState(starStateL, starStateR, magneticX, totalPressureStar, speed);
+
+  // Compute and return L** fluxes
+  // M&K 2005 equation 66
+  if (speed.M > 0.0 and speed.LStar <= 0.0) {
+    fluxL = mhd::internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL,
+                                                   speed, speed.L, speed.LStar);
+    mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL);
+    return;
+  }
+  // Compute and return R** fluxes
+  // M&K 2005 equation 66
+  if (speed.RStar > 0.0 and speed.M <= 0.0) {
+    fluxR = mhd::internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR,
+                                                   speed, speed.R, speed.RStar);
+    mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR);
+    return;
+  }
+}
+// =========================================================================
+
+namespace internal
+{
+// =====================================================================
+__device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma,
+                                                   int const &threadId, int const &n_cells, int const &o1,
+                                                   int const &o2, int const &o3)
+{
+  mhd::internal::State state;
+  state.density   = interfaceArr[threadId + n_cells * grid_enum::density];
+  state.density   = fmax(state.density, (Real)TINY_NUMBER);
+  state.velocityX = interfaceArr[threadId + n_cells * o1] / state.density;
+  state.velocityY = interfaceArr[threadId + n_cells * o2] / state.density;
+  state.velocityZ = interfaceArr[threadId + n_cells * o3] / state.density;
+  state.energy    = interfaceArr[threadId + n_cells * grid_enum::Energy];
+  state.energy    = fmax(state.energy, (Real)TINY_NUMBER);
+  state.magneticY = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_y];
+  state.magneticZ = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_z];
+
+  #ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    state.scalarSpecific[i] = interfaceArr[threadId + n_cells * (grid_enum::scalar + i)] / state.density;
+  }
+  #endif  // SCALAR
+  #ifdef DE
+  state.thermalEnergySpecific = interfaceArr[threadId + n_cells * grid_enum::GasEnergy] / state.density;
+
+  Real energyNonThermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(state.density, state.velocityX,
+                                                                             state.velocityY, state.velocityZ) +
+                          mhd::utils::computeMagneticEnergy(magneticX, state.magneticY, state.magneticZ);
+
+  state.gasPressure = fmax(hydro_utilities::Get_Pressure_From_DE(state.energy, state.energy - energyNonThermal,
+                                                                 state.thermalEnergySpecific * state.density, gamma),
+                           (Real)TINY_NUMBER);
+  #else
+  // Note that this function does the positive pressure check
+  // internally
+  state.gasPressure = mhd::internal::Calc_Pressure_Primitive(state, magneticX, gamma);
+  #endif  // DE
+
+  state.totalPressure =
+      mhd::utils::computeTotalPressure(state.gasPressure, magneticX, state.magneticY, state.magneticZ);
+
+  return state;
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ mhd::internal::Speeds approximateLRWaveSpeeds(mhd::internal::State const &stateL,
+                                                                  mhd::internal::State const &stateR,
+                                                                  Real const &magneticX, Real const &gamma)
+{
+  // Get the fast magnetosonic wave speeds
+  Real magSonicL = mhd::utils::fastMagnetosonicSpeed(stateL.density, stateL.gasPressure, magneticX, stateL.magneticY,
+                                                     stateL.magneticZ, gamma);
+  Real magSonicR = mhd::utils::fastMagnetosonicSpeed(stateR.density, stateR.gasPressure, magneticX, stateR.magneticY,
+                                                     stateR.magneticZ, gamma);
+
+  // Compute the S_L and S_R wave speeds.
+  // Version suggested by Miyoshi & Kusano 2005 and used in Athena
+  // M&K 2005 equation 67
+  Real magSonicMax = fmax(magSonicL, magSonicR);
+  mhd::internal::Speeds speed;
+  speed.L = fmin(stateL.velocityX, stateR.velocityX) - magSonicMax;
+  speed.R = fmax(stateL.velocityX, stateR.velocityX) + magSonicMax;
+
+  return speed;
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ Real approximateMiddleWaveSpeed(mhd::internal::State const &stateL,
+                                                    mhd::internal::State const &stateR,
+                                                    mhd::internal::Speeds const &speed)
+{
+  // Compute the S_M wave speed
+  // M&K 2005 equation 38
+  Real const speed_r_diff = speed.R - stateR.velocityX;
+  Real const speed_l_diff = speed.L - stateL.velocityX;
+
+  return  // Numerator
+      (speed_r_diff * stateR.density * stateR.velocityX - speed_l_diff * stateL.density * stateL.velocityX -
+       stateR.totalPressure + stateL.totalPressure) /
+      // Denominator
+      (speed_r_diff * stateR.density - speed_l_diff * stateL.density);
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ Real approximateStarWaveSpeed(mhd::internal::StarState const &starState,
+                                                  mhd::internal::Speeds const &speed, Real const &magneticX,
+                                                  Real const &side)
+{
+  // Compute the S_L^* and S_R^* wave speeds
+  // M&K 2005 equation 51
+  return speed.M + side * mhd::utils::alfvenSpeed(magneticX, starState.density);
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ mhd::internal::Flux nonStarFluxes(mhd::internal::State const &state, Real const &magneticX)
+{
+  mhd::internal::Flux flux;
+  // M&K 2005 equation 2
+  flux.density = state.density * state.velocityX;
+
+  flux.momentumX = flux.density * state.velocityX + state.totalPressure - magneticX * magneticX;
+  flux.momentumY = flux.density * state.velocityY - magneticX * state.magneticY;
+  flux.momentumZ = flux.density * state.velocityZ - magneticX * state.magneticZ;
+
+  flux.magneticY = state.magneticY * state.velocityX - magneticX * state.velocityY;
+  flux.magneticZ = state.magneticZ * state.velocityX - magneticX * state.velocityZ;
+
+  // Group transverse terms for FP associative symmetry
+  flux.energy = state.velocityX * (state.energy + state.totalPressure) -
+                magneticX * (state.velocityX * magneticX +
+                             ((state.velocityY * state.magneticY) + (state.velocityZ * state.magneticZ)));
+
+  return flux;
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3,
+                                      int const &n_cells, Real *dev_flux, mhd::internal::Flux const &flux,
+                                      mhd::internal::State const &state)
+{
+  // Note that the direction of the grid_enum::fluxX_magnetic_DIR is the
+  // direction of the electric field that the magnetic flux is, not the magnetic
+  // flux
+  dev_flux[threadId + n_cells * grid_enum::density]          = flux.density;
+  dev_flux[threadId + n_cells * o1]                          = flux.momentumX;
+  dev_flux[threadId + n_cells * o2]                          = flux.momentumY;
+  dev_flux[threadId + n_cells * o3]                          = flux.momentumZ;
+  dev_flux[threadId + n_cells * grid_enum::Energy]           = flux.energy;
+  dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_z] = flux.magneticY;
+  dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_y] = flux.magneticZ;
+
+  #ifdef SCALAR
+  for (int i = 0; i < NSCALARS; i++) {
+    dev_flux[threadId + n_cells * (grid_enum::scalar + i)] = state.scalarSpecific[i] * flux.density;
+  }
+  #endif  // SCALAR
+  #ifdef DE
+  dev_flux[threadId + n_cells * grid_enum::GasEnergy] = state.thermalEnergySpecific * flux.density;
+  #endif  // DE
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ Real starTotalPressure(mhd::internal::State const &stateL, mhd::internal::State const &stateR,
+                                           mhd::internal::Speeds const &speed)
+{
+  // M&K 2005 equation 41
+  return  // Numerator
+      (stateR.density * stateL.totalPressure * (speed.R - stateR.velocityX) -
+       stateL.density * stateR.totalPressure * (speed.L - stateL.velocityX) +
+       stateL.density * stateR.density * (speed.R - stateR.velocityX) * (speed.L - stateL.velocityX) *
+           (stateR.velocityX - stateL.velocityX)) /
+      // Denominator
+      (stateR.density * (speed.R - stateR.velocityX) - stateL.density * (speed.L - stateL.velocityX));
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ mhd::internal::StarState computeStarState(mhd::internal::State const &state,
+                                                              mhd::internal::Speeds const &speed, Real const &speedSide,
+                                                              Real const &magneticX, Real const &totalPressureStar)
+{
+  mhd::internal::StarState starState;
+
+  // Compute the densities in the star state
+  // M&K 2005 equation 43
+  starState.density = state.density * (speedSide - state.velocityX) / (speedSide - speed.M);
+
+  // Check for and handle the degenerate case
+  // Explained at the top of page 326 in M&K 2005
+  if (fabs(state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX)) <
+      totalPressureStar * mhd::internal::_hlldSmallNumber) {
+    starState.velocityY = state.velocityY;
+    starState.velocityZ = state.velocityZ;
+    starState.magneticY = state.magneticY;
+    starState.magneticZ = state.magneticZ;
+  } else {
+    // Denominator for M&K 2005 equations 44-47
+    Real const denom = state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX);
+
+    // Compute the velocity and magnetic field in the star state
+    // M&K 2005 equations 44 & 46
+    Real coef           = magneticX * (speed.M - state.velocityX) / denom;
+    starState.velocityY = state.velocityY - state.magneticY * coef;
+    starState.velocityZ = state.velocityZ - state.magneticZ * coef;
+
+    // M&K 2005 equations 45 & 47
+    Real tmpPower       = (speedSide - state.velocityX);
+    tmpPower            = tmpPower * tmpPower;
+    coef                = (state.density * tmpPower - (magneticX * magneticX)) / denom;
+    starState.magneticY = state.magneticY * coef;
+    starState.magneticZ = state.magneticZ * coef;
+  }
+
+  // M&K 2005 equation 48
+  starState.energy = (state.energy * (speedSide - state.velocityX) - state.totalPressure * state.velocityX +
+                      totalPressureStar * speed.M +
+                      magneticX * (math_utils::dotProduct(state.velocityX, state.velocityY, state.velocityZ, magneticX,
+                                                          state.magneticY, state.magneticZ) -
+                                   math_utils::dotProduct(speed.M, starState.velocityY, starState.velocityZ, magneticX,
+                                                          starState.magneticY, starState.magneticZ))) /
+                     (speedSide - speed.M);
+
+  return starState;
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ mhd::internal::Flux starFluxes(mhd::internal::StarState const &starState,
+                                                   mhd::internal::State const &state, mhd::internal::Flux const &flux,
+                                                   mhd::internal::Speeds const &speed, Real const &speedSide)
+{
+  mhd::internal::Flux starFlux;
+
+  // Now compute the star state fluxes
+  // M&K 2005 equations 64
+  starFlux.density   = flux.density + speedSide * (starState.density - state.density);
+  starFlux.momentumX = flux.momentumX + speedSide * (starState.density * speed.M - state.density * state.velocityX);
+  starFlux.momentumY =
+      flux.momentumY + speedSide * (starState.density * starState.velocityY - state.density * state.velocityY);
+  starFlux.momentumZ =
+      flux.momentumZ + speedSide * (starState.density * starState.velocityZ - state.density * state.velocityZ);
+  starFlux.energy    = flux.energy + speedSide * (starState.energy - state.energy);
+  starFlux.magneticY = flux.magneticY + speedSide * (starState.magneticY - state.magneticY);
+  starFlux.magneticZ = flux.magneticZ + speedSide * (starState.magneticZ - state.magneticZ);
+
+  return starFlux;
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ mhd::internal::DoubleStarState computeDoubleStarState(mhd::internal::StarState const &starStateL,
+                                                                          mhd::internal::StarState const &starStateR,
+                                                                          Real const &magneticX,
+                                                                          Real const &totalPressureStar,
+                                                                          mhd::internal::Speeds const &speed)
+{
+  mhd::internal::DoubleStarState doubleStarState;
+
+  // if Bx is zero then just return the star state
+  // Explained at the top of page 328 in M&K 2005. Essentially when
+  // magneticX is 0 this reduces to the HLLC solver
+  if (0.5 * (magneticX * magneticX) < mhd::internal::_hlldSmallNumber * totalPressureStar) {
+    if (speed.M >= 0.0) {
+      // We're in the L** state but Bx=0 so return L* state
+      doubleStarState.velocityY = starStateL.velocityY;
+      doubleStarState.velocityZ = starStateL.velocityZ;
+      doubleStarState.magneticY = starStateL.magneticY;
+      doubleStarState.magneticZ = starStateL.magneticZ;
+      doubleStarState.energyL   = starStateL.energy;
+    } else {
+      // We're in the L** state but Bx=0 so return L* state
+      doubleStarState.velocityY = starStateR.velocityY;
+      doubleStarState.velocityZ = starStateR.velocityZ;
+      doubleStarState.magneticY = starStateR.magneticY;
+      doubleStarState.magneticZ = starStateR.magneticZ;
+      doubleStarState.energyR   = starStateR.energy;
+    }
+  } else {
+    // Setup some variables we'll need later
+    Real sqrtDL           = sqrt(starStateL.density);
+    Real sqrtDR           = sqrt(starStateR.density);
+    Real inverseDensities = 1.0 / (sqrtDL + sqrtDR);
+    Real magXSign         = copysign(1.0, magneticX);
+
+    // All we need to do now is compute the transverse velocities
+    // and magnetic fields along with the energy
+
+    // Double Star velocities
+    // M&K 2005 equations 59 & 60
+    doubleStarState.velocityY = inverseDensities * (sqrtDL * starStateL.velocityY + sqrtDR * starStateR.velocityY +
+                                                    magXSign * (starStateR.magneticY - starStateL.magneticY));
+    doubleStarState.velocityZ = inverseDensities * (sqrtDL * starStateL.velocityZ + sqrtDR * starStateR.velocityZ +
+                                                    magXSign * (starStateR.magneticZ - starStateL.magneticZ));
+
+    // Double star magnetic fields
+    // M&K 2005 equations 61 & 62
+    doubleStarState.magneticY =
+        inverseDensities * (sqrtDL * starStateR.magneticY + sqrtDR * starStateL.magneticY +
+                            magXSign * (sqrtDL * sqrtDR) * (starStateR.velocityY - starStateL.velocityY));
+    doubleStarState.magneticZ =
+        inverseDensities * (sqrtDL * starStateR.magneticZ + sqrtDR * starStateL.magneticZ +
+                            magXSign * (sqrtDL * sqrtDR) * (starStateR.velocityZ - starStateL.velocityZ));
+
+    // Double star energy
+    Real velDblStarDotMagDblStar =
+        math_utils::dotProduct(speed.M, doubleStarState.velocityY, doubleStarState.velocityZ, magneticX,
+                               doubleStarState.magneticY, doubleStarState.magneticZ);
+    // M&K 2005 equation 63
+    doubleStarState.energyL =
+        starStateL.energy - sqrtDL * magXSign *
+                                (math_utils::dotProduct(speed.M, starStateL.velocityY, starStateL.velocityZ, magneticX,
+                                                        starStateL.magneticY, starStateL.magneticZ) -
+                                 velDblStarDotMagDblStar);
+    doubleStarState.energyR =
+        starStateR.energy + sqrtDR * magXSign *
+                                (math_utils::dotProduct(speed.M, starStateR.velocityY, starStateR.velocityZ, magneticX,
+                                                        starStateR.magneticY, starStateR.magneticZ) -
+                                 velDblStarDotMagDblStar);
+  }
+
+  return doubleStarState;
+}
+// =====================================================================
+
+// =====================================================================
+__device__ __host__ mhd::internal::Flux computeDoubleStarFluxes(
+    mhd::internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy,
+    mhd::internal::StarState const &starState, mhd::internal::State const &state, mhd::internal::Flux const &flux,
+    mhd::internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar)
+{
+  mhd::internal::Flux doubleStarFlux;
+
+  Real const speed_diff = speedSideStar - speedSide;
+
+  // M&K 2005 equation 65
+  doubleStarFlux.density =
+      flux.density - speedSide * state.density - speed_diff * starState.density + speedSideStar * starState.density;
+
+  doubleStarFlux.momentumX = flux.momentumX - speedSide * (state.density * state.velocityX) -
+                             speed_diff * (starState.density * speed.M) + speedSideStar * (starState.density * speed.M);
+  doubleStarFlux.momentumY = flux.momentumY - speedSide * (state.density * state.velocityY) -
+                             speed_diff * (starState.density * starState.velocityY) +
+                             speedSideStar * (starState.density * doubleStarState.velocityY);
+  doubleStarFlux.momentumZ = flux.momentumZ - speedSide * (state.density * state.velocityZ) -
+                             speed_diff * (starState.density * starState.velocityZ) +
+                             speedSideStar * (starState.density * doubleStarState.velocityZ);
+  doubleStarFlux.energy =
+      flux.energy - speedSide * state.energy - speed_diff * starState.energy + speedSideStar * doubleStarStateEnergy;
+  doubleStarFlux.magneticY = flux.magneticY - speedSide * state.magneticY - speed_diff * starState.magneticY +
+                             speedSideStar * doubleStarState.magneticY;
+  doubleStarFlux.magneticZ = flux.magneticZ - speedSide * state.magneticZ - speed_diff * starState.magneticZ +
+                             speedSideStar * doubleStarState.magneticZ;
+
+  return doubleStarFlux;
+}
+// =====================================================================
+
+}  // namespace internal
+}  // end namespace mhd
+#endif  // MHD
diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h
index d8d58dce1..8c547e889 100644
--- a/src/riemann_solvers/hlld_cuda.h
+++ b/src/riemann_solvers/hlld_cuda.h
@@ -1,7 +1,9 @@
 /*!
  * \file hlld_cuda.cu
  * \author Robert 'Bob' Caddy (rvc@pitt.edu)
- * \brief Contains the declaration of the HLLD solver
+ * \brief Contains the declaration of the HLLD solver from Miyoshi & Kusano 2005
+ * "A multi-state HLL approximate Riemann solver for ideal
+ * magnetohydrodynamics", hereafter referred to as M&K 2005
  *
  */
 
@@ -11,385 +13,257 @@
 
 // Local Includes
 #include "../global/global.h"
+#include "../utils/hydro_utilities.h"
 
-#ifdef CUDA
+/*!
+ * \brief Namespace for MHD code
+ *
+ */
+namespace mhd
+{
+/*!
+ * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005
+ *
+ * \param[in]  dev_bounds_L The interface states on the left side of the
+ * interface
+ * \param[in]  dev_bounds_R The interface states on the right side of
+ * the interface
+ * \param[in]  dev_magnetic_face A pointer to the begining of the
+ * conserved magnetic field array that is stored at the interface. I.e. for the
+ * X-direction solve this would be the begining of the X-direction fields
+ * \param[out] dev_flux The output flux
+ * \param[in]  n_cells Total number of cells
+ * \param[in]  n_ghost Number of ghost cells on each side
+ * \param[in]  dir The direction that the solve is taking place in. 0=X, 1=Y,
+ * 2=Z
+ * \param[in]  n_fields The total number of fields
+ */
+__global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const *dev_bounds_R,
+                                           Real const *dev_magnetic_face, Real *dev_flux, int const n_cells,
+                                           Real const gamma, int const direction, int const n_fields);
+
+/*!
+ * \brief Namespace to hold private functions used within the HLLD
+ * solver
+ *
+ */
+namespace internal
+{
+/*!
+ * \brief Used for some comparisons. Value was chosen to match what is
+ * used in Athena
+ */
+Real static const _hlldSmallNumber = 1.0e-8;
+
+/*!
+ * \brief Holds all the data needed for the non-star states of the HLLD solver
+ *
+ */
+struct State {
+  Real density, velocityX, velocityY, velocityZ, energy, magneticY, magneticZ, gasPressure, totalPressure;
+#ifdef SCALAR
+  Real scalarSpecific[grid_enum::nscalars];
+#endif  // SCALAR
+#ifdef DE
+  Real thermalEnergySpecific;
+#endif  // DE
+};
+
+/*!
+ * \brief Holds all the data needed for the star states of the HLLD solver
+ * except total pressure and x velocity as those are shared between the left and
+ * right states
+ *
+ */
+struct StarState {
+  // velocityStarX = Speeds.M
+  // Total pressure is computed on its own since it's shared
+  Real density, velocityY, velocityZ, energy, magneticY, magneticZ;
+};
+
+/*!
+ * \brief Holds all the data needed for the double star states of the HLLD
+ * solver except the x velocity, density, and total pressure since those are all
+ * inherited from the star state.
+ *
+ */
+struct DoubleStarState {
+  // velocityDoubleStarX = Speeds.M
+  // densityDoubleStar = densityStar
+  // pressureDoubleStar = pressureStar
+  // Shared values
+  Real velocityY, velocityZ, magneticY, magneticZ;
+  // Different values. Initializing these since one or the other can be uninitializing leading to bad tests
+  Real energyL = 0.0, energyR = 0.0;
+};
+
+/*!
+ * \brief Holds all the data needed for the fluxes in the HLLD solver
+ *
+ */
+struct Flux {
+  Real density, momentumX, momentumY, momentumZ, energy, magneticY, magneticZ;
+};
+
+/*!
+ * \brief Holds all the data needed for the speeds in the HLLD solver
+ *
+ */
+struct Speeds {
+  Real L, LStar, M, RStar, R;
+};
+
+/*!
+ * \brief Load and compute the left or right state
+ *
+ * \param interfaceArr The interface array to load from
+ * \param magneticX The X magnetic field
+ * \param gamma The adiabatic index
+ * \param threadId The thread ID
+ * \param n_cells Total number of cells
+ * \param o1 Direction parameter
+ * \param o2 Direction parameter
+ * \param o3 Direction parameter
+ * \return mhd::internal::State The loaded state
+ */
+__device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma,
+                                                   int const &threadId, int const &n_cells, int const &o1,
+                                                   int const &o2, int const &o3);
 
-    /*!
-     * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005
-     *
-     * \param[in]  dev_bounds_L
-     * \param[in]  dev_bounds_R
-     * \param[out] dev_flux
-     * \param[in]  nx
-     * \param[in]  ny
-     * \param[in]  nz
-     * \param[in]  n_ghost
-     * \param[in]  gamma
-     * \param[in]  dir
-     * \param[in]  n_fields
-     */
-    __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L,
-                                               Real *dev_bounds_R,
-                                               Real *dev_flux,
-                                               int nx,
-                                               int ny,
-                                               int nz,
-                                               int n_ghost,
-                                               Real gamma,
-                                               int direction,
-                                               int n_fields);
+/*!
+ * \brief Compute the approximate left and right wave speeds. M&K 2005 equation
+ * 67
+ */
+__device__ __host__ mhd::internal::Speeds approximateLRWaveSpeeds(mhd::internal::State const &stateL,
+                                                                  mhd::internal::State const &stateR,
+                                                                  Real const &magneticX, Real const &gamma);
 
-    /*!
-     * \brief Namespace to hold private functions used within the HLLD
-     * solver
-     *
-     */
-    namespace _hlldInternal
-    {
-        /*!
-         * \brief Used for some comparisons. Value was chosen to match what is
-         * used in Athena
-         */
-        Real static const _hlldSmallNumber = 1.0e-8;
+/*!
+ * \brief Compute the approximate middle wave speed. M&K 2005 equation 38
+ */
+__device__ __host__ Real approximateMiddleWaveSpeed(mhd::internal::State const &stateL,
+                                                    mhd::internal::State const &stateR,
+                                                    mhd::internal::Speeds const &speed);
 
-        /*!
-         * \brief Compute the left, right, star, and middle wave speeds. Also
-         * returns the densities in the star states
-         *
-         * \param[in] densityL Density, left side
-         * \param[in] momentumXL Momentum in the X-direction, left side
-         * \param[in] momentumYL Momentum in the Y-direction, left side
-         * \param[in] momentumZL Momentum in the Z-direction, left side
-         * \param[in] velocityXL Velocity in the X-direction, left side
-         * \param[in] velocityYL Velocity in the Y-direction, left side
-         * \param[in] velocityZL Velocity in the Z-direction, left side
-         * \param[in] gasPressureL Gas pressure, left side
-         * \param[in] totalPressureL Total MHD pressure, left side
-         * \param[in] magneticXL Magnetic field in the X-direction, left side
-         * \param[in] magneticYL Magnetic field in the Y-direction, left side
-         * \param[in] magneticZL Magnetic field in the Z-direction, left side
-         * \param[in] densityR Density, right side
-         * \param[in] momentumXR Momentum in the X-direction, right side
-         * \param[in] momentumYR Momentum in the Y-direction, right side
-         * \param[in] momentumZR Momentum in the Z-direction, right side
-         * \param[in] velocityXR Velocity in the X-direction, right side
-         * \param[in] velocityYR Velocity in the Y-direction, right side
-         * \param[in] velocityZR Velocity in the Z-direction, right side
-         * \param[in] gasPressureR Gas pressure, right side
-         * \param[in] totalPressureR Total MHD pressure, right side
-         * \param[in] magneticXR Magnetic field in the X-direction, right side
-         * \param[in] magneticYR Magnetic field in the Y-direction, right side
-         * \param[in] magneticZR Magnetic field in the Z-direction, right side
-         * \param[in] gamma Adiabatic index
-         * \param[out] speedL Approximate speed of the left most wave
-         * \param[out] speedR Approximate speed of the right most wave
-         * \param[out] speedM Speed of the middle wave
-         * \param[out] speedStarL Speed of the left star state wave
-         * \param[out] speedStarR Speed of the right star state wave
-         * \param[out] densityStarL Density in left star region
-         * \param[out] densityStarR Density in right star region
-         */
-        __device__ __host__ void _approximateWaveSpeeds(Real const &densityL,
-                                                        Real const &momentumXL,
-                                                        Real const &momentumYL,
-                                                        Real const &momentumZL,
-                                                        Real const &velocityXL,
-                                                        Real const &velocityYL,
-                                                        Real const &velocityZL,
-                                                        Real const &gasPressureL,
-                                                        Real const &totalPressureL,
-                                                        Real const &magneticXL,
-                                                        Real const &magneticYL,
-                                                        Real const &magneticZL,
-                                                        Real const &densityR,
-                                                        Real const &momentumXR,
-                                                        Real const &momentumYR,
-                                                        Real const &momentumZR,
-                                                        Real const &velocityXR,
-                                                        Real const &velocityYR,
-                                                        Real const &velocityZR,
-                                                        Real const &gasPressureR,
-                                                        Real const &totalPressureR,
-                                                        Real const &magneticXR,
-                                                        Real const &magneticYR,
-                                                        Real const &magneticZR,
-                                                        Real const &gamma,
-                                                        Real &speedL,
-                                                        Real &speedR,
-                                                        Real &speedM,
-                                                        Real &speedStarL,
-                                                        Real &speedStarR,
-                                                        Real &densityStarL,
-                                                        Real &densityStarR);
+/*!
+ * \brief Compute the approximate left and right wave speeds. M&K 2005 equation
+ * 51
+ */
+__device__ __host__ Real approximateStarWaveSpeed(mhd::internal::StarState const &starState,
+                                                  mhd::internal::Speeds const &speed, Real const &magneticX,
+                                                  Real const &side);
 
-        /*!
-         * \brief Compute the fluxes in the left or right non-star state
-         *
-         * \param[in] momentumX Momentum in the X-direction
-         * \param[in] velocityX Velocity in the X-direction
-         * \param[in] velocityY Velocity in the Y-direction
-         * \param[in] velocityZ Velocity in the Z-direction
-         * \param[in] totalPressure Total MHD pressure
-         * \param[in] energy Energy
-         * \param[in] magneticX Magnetic field in -direction
-         * \param[in] magneticY Magnetic field in -direction
-         * \param[in] magneticZ Magnetic field in -direction
-         * \param[out] densityFlux The density flux
-         * \param[out] momentumFluxX The momentum flux in the X-direction
-         * \param[out] momentumFluxY The momentum flux in the Y-direction
-         * \param[out] momentumFluxZ The momentum flux in the Z-direction
-         * \param[out] magneticFluxY The magnetic field flux in the Y-direction
-         * \param[out] magneticFluxZ The magnetic field flux in the Z-direction
-         * \param[out] energyFlux The energy flux
-         */
-        __device__ __host__ void _nonStarFluxes(Real const &momentumX,
-                                                Real const &velocityX,
-                                                Real const &velocityY,
-                                                Real const &velocityZ,
-                                                Real const &totalPressure,
-                                                Real const &energy,
-                                                Real const &magneticX,
-                                                Real const &magneticY,
-                                                Real const &magneticZ,
-                                                Real &densityFlux,
-                                                Real &momentumFluxX,
-                                                Real &momentumFluxY,
-                                                Real &momentumFluxZ,
-                                                Real &magneticFluxY,
-                                                Real &magneticFluxZ,
-                                                Real &energyFlux);
+/*!
+ * \brief Compute the fluxes in the left or right non-star state. M&K 2005
+ * equation 2
+ *
+ * \param state The state to compute the flux of
+ * \param magneticX The X magnetic field
+ * \return mhd::internal::Flux The flux in the state
+ */
+__device__ __host__ mhd::internal::Flux nonStarFluxes(mhd::internal::State const &state, Real const &magneticX);
 
-        /*!
-         * \brief Assign the given flux values to the dev_flux array
-         *
-         * \param[in] threadId The thread ID
-         * \param[in] o1 Offset to get indexing right
-         * \param[in] o2 Offset to get indexing right
-         * \param[in] o3 Offset to get indexing right
-         * \param[in] n_cells Number of cells
-         * \param[out] dev_flux The flux array
-         * \param[in] densityFlux The density flux
-         * \param[in] momentumFluxX The momentum flux in the X-direction
-         * \param[in] momentumFluxY The momentum flux in the Y-direction
-         * \param[in] momentumFluxZ The momentum flux in the Z-direction
-         * \param[in] magneticFluxY The magnetic field flux in the X-direction
-         * \param[in] magneticFluxZ The magnetic field flux in the Y-direction
-         * \param[in] energyFlux The energy flux
-         */
-        __device__ __host__ void _returnFluxes(int const &threadId,
-                                               int const &o1,
-                                               int const &o2,
-                                               int const &o3,
-                                               int const &n_cells,
-                                               Real *dev_flux,
-                                               Real const &densityFlux,
-                                               Real const &momentumFluxX,
-                                               Real const &momentumFluxY,
-                                               Real const &momentumFluxZ,
-                                               Real const &magneticFluxY,
-                                               Real const &magneticFluxZ,
-                                               Real const &energyFlux);
+/*!
+ * \brief Write the given flux values to the dev_flux array
+ *
+ * \param[in] threadId The thread ID
+ * \param[in] o1 Offset to get indexing right
+ * \param[in] o2 Offset to get indexing right
+ * \param[in] o3 Offset to get indexing right
+ * \param[in] n_cells Number of cells
+ * \param[out] dev_flux The flux array
+ * \param[in] flux The fluxes to write out
+ * \param[in] state The left or right state depending on if this is a return for
+ * one of the left states or one of the right states
+ */
+__device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3,
+                                      int const &n_cells, Real *dev_flux, mhd::internal::Flux const &flux,
+                                      mhd::internal::State const &state);
 
-        /*!
-         * \brief Compute the fluxes in the left or right star state
-         *
-         * \param[in] speedM Speed of the central wave
-         * \param[in] speedSide Speed of the non-star wave on the side being computed
-         * \param[in] density Density
-         * \param[in] velocityX Velocity in the X-direction
-         * \param[in] velocityY Velocity in the Y-direction
-         * \param[in] velocityZ Velocity in the Z-direction
-         * \param[in] momentumX Momentum in the X-direction
-         * \param[in] momentumY Momentum in the Y-direction
-         * \param[in] momentumZ Momentum in the Z-direction
-         * \param[in] energy Energy
-         * \param[in] totalPressure Total MHD pressure
-         * \param[in] magneticX Magnetic field in the X-direction
-         * \param[in] magneticY Magnetic field in the Y-direction
-         * \param[in] magneticZ Magnetic field in the Z-direction
-         * \param[in] densityStar Density in the star state
-         * \param[in] totalPressureStar Total MHD pressure in the star state
-         * \param[in] densityFlux Density Flux from the non-star state
-         * \param[in] momentumFluxX Momentum flux from the non-star state in the X-direction
-         * \param[in] momentumFluxY Momentum flux from the non-star state in the Y-direction
-         * \param[in] momentumFluxZ Momentum flux from the non-star state in the Z-direction
-         * \param[in] energyFlux Energy flux from the non-star state
-         * \param[in] magneticFluxY Magnetic flux from the non-star state in the X-direction
-         * \param[in] magneticFluxZ Magnetic flux from the non-star state in the Y-direction
-         * \param[out] velocityStarY Velocity in the star state in the Y-direction
-         * \param[out] velocityStarZ Velocity in the star state in the Z-direction
-         * \param[out] energyStar Energy in the star state
-         * \param[out] magneticStarY Magnetic field in the star state in the X-direction
-         * \param[out] magneticStarZ Magnetic field in the star state in the Y-direction
-         * \param[out] densityStarFlux Density flux in the star state
-         * \param[out] momentumStarFluxX Momentum flux in the star state in the X-direction
-         * \param[out] momentumStarFluxY Momentum flux in the star state in the Y-direction
-         * \param[out] momentumStarFluxZ Momentum flux in the star state in the Z-direction
-         * \param[out] energyStarFlux Energy flux in the star state
-         * \param[out] magneticStarFluxY Magnetic field flux in the star state in the X-direction
-         * \param[out] magneticStarFluxZ Magnetic field flux in the star state in the Y-direction
-         *
-         */
-        __device__ __host__ void _starFluxes(Real const &speedM,
-                                             Real const &speedSide,
-                                             Real const &density,
-                                             Real const &velocityX,
-                                             Real const &velocityY,
-                                             Real const &velocityZ,
-                                             Real const &momentumX,
-                                             Real const &momentumY,
-                                             Real const &momentumZ,
-                                             Real const &energy,
-                                             Real const &totalPressure,
-                                             Real const &magneticX,
-                                             Real const &magneticY,
-                                             Real const &magneticZ,
-                                             Real const &densityStar,
-                                             Real const &totalPressureStar,
-                                             Real const &densityFlux,
-                                             Real const &momentumFluxX,
-                                             Real const &momentumFluxY,
-                                             Real const &momentumFluxZ,
-                                             Real const &energyFlux,
-                                             Real const &magneticFluxY,
-                                             Real const &magneticFluxZ,
-                                             Real &velocityStarY,
-                                             Real &velocityStarZ,
-                                             Real &energyStar,
-                                             Real &magneticStarY,
-                                             Real &magneticStarZ,
-                                             Real &densityStarFlux,
-                                             Real &momentumStarFluxX,
-                                             Real &momentumStarFluxY,
-                                             Real &momentumStarFluxZ,
-                                             Real &energyStarFlux,
-                                             Real &magneticStarFluxY,
-                                             Real &magneticStarFluxZ);
+/*!
+ * \brief Compute the total pressure in the star states. M&K 2005 equation 41
+ *
+ * \param stateL The left state
+ * \param stateR The right state
+ * \param speed The wave speeds
+ * \return Real The total pressure in the star state
+ */
+__device__ __host__ Real starTotalPressure(mhd::internal::State const &stateL, mhd::internal::State const &stateR,
+                                           mhd::internal::Speeds const &speed);
 
-        /*!
-         * \brief Compute the dot product of a and b.
-         *
-         * \param[in] a1 The first element of a
-         * \param[in] a2 The second element of a
-         * \param[in] a3 The third element of a
-         * \param[in] b1 The first element of b
-         * \param[in] b2 The second element of b
-         * \param[in] b3 The third element of b
-         *
-         * \return Real The dot product of a and b
-         */
-        inline __device__ __host__ Real _dotProduct(Real const &a1,
-                                                    Real const &a2,
-                                                    Real const &a3,
-                                                    Real const &b1,
-                                                    Real const &b2,
-                                                    Real const &b3)
-        {return a1*b1 + ((a2*b2) + (a3*b3));};
+/*!
+ * \brief Compute the L* or R* state. M&K 2005 equations 43-48
+ *
+ * \param state The non-star state on the same side as the desired star
+ * state \param speed The wavespeeds \param speedSide The wave speed on the
+ * same side as the desired star state \param magneticX The magnetic field
+ * in the x direction \param totalPressureStar The total pressure in the
+ * star state \return mhd::internal::StarState The computed star state
+ */
+__device__ __host__ mhd::internal::StarState computeStarState(mhd::internal::State const &state,
+                                                              mhd::internal::Speeds const &speed, Real const &speedSide,
+                                                              Real const &magneticX, Real const &totalPressureStar);
 
-        /*!
-         * \brief Compute the double star state
-         *
-         * \param[in] speedM
-         * \param[in] magneticX
-         * \param[in] totalPressureStar
-         * \param[in] densityStarL
-         * \param[in] velocityStarYL
-         * \param[in] velocityStarZL
-         * \param[in] energyStarL
-         * \param[in] magneticStarYL
-         * \param[in] magneticStarZL
-         * \param[in] densityStarR
-         * \param[in] velocityStarYR
-         * \param[in] velocityStarZR
-         * \param[in] energyStarR
-         * \param[in] magneticStarYR
-         * \param[in] magneticStarZR
-         * \param[out] velocityDoubleStarY
-         * \param[out] velocityDoubleStarZ
-         * \param[out] magneticDoubleStarY
-         * \param[out] magneticDoubleStarZ
-         * \param[out] energyDoubleStarL
-         * \param[out] energyDoubleStarR
-         */
-        __device__ __host__ void _doubleStarState(Real const &speedM,
-                                                  Real const &magneticX,
-                                                  Real const &totalPressureStar,
-                                                  Real const &densityStarL,
-                                                  Real const &velocityStarYL,
-                                                  Real const &velocityStarZL,
-                                                  Real const &energyStarL,
-                                                  Real const &magneticStarYL,
-                                                  Real const &magneticStarZL,
-                                                  Real const &densityStarR,
-                                                  Real const &velocityStarYR,
-                                                  Real const &velocityStarZR,
-                                                  Real const &energyStarR,
-                                                  Real const &magneticStarYR,
-                                                  Real const &magneticStarZR,
-                                                  Real &velocityDoubleStarY,
-                                                  Real &velocityDoubleStarZ,
-                                                  Real &magneticDoubleStarY,
-                                                  Real &magneticDoubleStarZ,
-                                                  Real &energyDoubleStarL,
-                                                  Real &energyDoubleStarR);
+/*!
+ * \brief Compute the flux in the star state. M&K 2005 equation 64
+ *
+ * \param starState The star state to compute the flux of
+ * \param state The non-star state on the same side as the star state
+ * \param flux The non-star flux on the same side as the star state
+ * \param speed The wave speeds
+ * \param speedSide The non-star wave speed on the same side as the star state
+ * \return mhd::internal::Flux The flux in the star state
+ */
+__device__ __host__ mhd::internal::Flux starFluxes(mhd::internal::StarState const &starState,
+                                                   mhd::internal::State const &state, mhd::internal::Flux const &flux,
+                                                   mhd::internal::Speeds const &speed, Real const &speedSide);
 
-        /*!
-         * \brief Compute the double star state fluxes
-         *
-         * \param[in] speedStarSide The star speed on the side being computed
-         * \param[in] momentumStarFluxX
-         * \param[in] momentumStarFluxY
-         * \param[in] momentumStarFluxZ
-         * \param[in] energyStarFlux
-         * \param[in] magneticStarFluxY
-         * \param[in] magneticStarFluxZ
-         * \param[in] densityStar
-         * \param[in] velocityStarX
-         * \param[in] velocityStarY
-         * \param[in] velocityStarZ
-         * \param[in] energyStar
-         * \param[in] magneticStarY
-         * \param[in] magneticStarZ
-         * \param[in] velocityDoubleStarX
-         * \param[in] velocityDoubleStarY
-         * \param[in] velocityDoubleStarZ
-         * \param[in] energyDoubleStar
-         * \param[in] magneticDoubleStarY
-         * \param[in] magneticDoubleStarZ
-         * \param[out] momentumDoubleStarFluxX
-         * \param[out] momentumDoubleStarFluxY
-         * \param[out] momentumDoubleStarFluxZ
-         * \param[out] energyDoubleStarFlux
-         * \param[out] magneticDoubleStarFluxY
-         * \param[out] magneticDoubleStarFluxZ
-         */
-        __device__ __host__ void _doubleStarFluxes(Real const &speedStarSide,
-                                                   Real const &momentumStarFluxX,
-                                                   Real const &momentumStarFluxY,
-                                                   Real const &momentumStarFluxZ,
-                                                   Real const &energyStarFlux,
-                                                   Real const &magneticStarFluxY,
-                                                   Real const &magneticStarFluxZ,
-                                                   Real const &densityStar,
-                                                   Real const &velocityStarX,
-                                                   Real const &velocityStarY,
-                                                   Real const &velocityStarZ,
-                                                   Real const &energyStar,
-                                                   Real const &magneticStarY,
-                                                   Real const &magneticStarZ,
-                                                   Real const &velocityDoubleStarX,
-                                                   Real const &velocityDoubleStarY,
-                                                   Real const &velocityDoubleStarZ,
-                                                   Real const &energyDoubleStar,
-                                                   Real const &magneticDoubleStarY,
-                                                   Real const &magneticDoubleStarZ,
-                                                   Real &momentumDoubleStarFluxX,
-                                                   Real &momentumDoubleStarFluxY,
-                                                   Real &momentumDoubleStarFluxZ,
-                                                   Real &energyDoubleStarFlux,
-                                                   Real &magneticDoubleStarFluxY,
-                                                   Real &magneticDoubleStarFluxZ);
+/*!
+ * \brief Compute the double star state. M&K 2005 equations 59-63
+ *
+ * \param starStateL The Left star state
+ * \param starStateR The Right star state
+ * \param magneticX The x magnetic field
+ * \param totalPressureStar The total pressure in the star state
+ * \param speed The approximate wave speeds
+ * \return mhd::internal::DoubleStarState The double star state
+ */
+__device__ __host__ mhd::internal::DoubleStarState computeDoubleStarState(mhd::internal::StarState const &starStateL,
+                                                                          mhd::internal::StarState const &starStateR,
+                                                                          Real const &magneticX,
+                                                                          Real const &totalPressureStar,
+                                                                          mhd::internal::Speeds const &speed);
 
-    } // _hlldInternal namespace
+/*!
+ * \brief Compute the double star state fluxes. M&K 2005 equation 65
+ *
+ * \param doubleStarState The double star states
+ * \param starState The star state on the same side
+ * \param state The non-star state on the same side
+ * \param flux The non-star flux on the same side
+ * \param speed The approximate wave speeds
+ * \param speedSide The wave speed on the same side
+ * \param speedSideStar The star wave speed on the same side
+ * \return __device__
+ */
+__device__ __host__ mhd::internal::Flux computeDoubleStarFluxes(
+    mhd::internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy,
+    mhd::internal::StarState const &starState, mhd::internal::State const &state, mhd::internal::Flux const &flux,
+    mhd::internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar);
 
-#endif //CUDA
+/*!
+ * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver
+ *
+ * \param state The State to compute the gas pressure of
+ * \param magneticX The X magnetic field
+ * \param gamma The adiabatic index
+ * \return Real The gas pressure
+ */
+inline __host__ __device__ Real Calc_Pressure_Primitive(mhd::internal::State const &state, Real const &magneticX,
+                                                        Real const &gamma)
+{
+  return hydro_utilities::Calc_Pressure_Primitive(state.energy, state.density, state.velocityX, state.velocityY,
+                                                  state.velocityZ, gamma, magneticX, state.magneticY, state.magneticZ);
+}
+}  // namespace internal
+}  // end namespace mhd
diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu
index 754c2dba0..7fc96bf0c 100644
--- a/src/riemann_solvers/hlld_cuda_tests.cu
+++ b/src/riemann_solvers/hlld_cuda_tests.cu
@@ -1,2581 +1,2411 @@
 /*!
-* \file hlld_cuda_tests.cpp
-* \author Robert 'Bob' Caddy (rvc@pitt.edu)
-* \brief Test the code units within hlld_cuda.cu
-*
-*/
+ * \file hlld_cuda_tests.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Test the code units within hlld_cuda.cu
+ *
+ */
 
 // STL Includes
+#include <algorithm>
 #include <cmath>
+#include <numeric>
 #include <stdexcept>
-#include <algorithm>
-#include <valarray>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local Includes
 #include "../global/global_cuda.h"
+#include "../grid/grid_enum.h"
+#include "../riemann_solvers/hlld_cuda.h"  // Include code to test
 #include "../utils/gpu.hpp"
-#include "../utils/testing_utilities.h"
+#include "../utils/hydro_utilities.h"
 #include "../utils/mhd_utilities.h"
-#include "../riemann_solvers/hlld_cuda.h"   // Include code to test
-
-#if defined(CUDA) && defined(HLLD)
-    // =========================================================================
-    // Integration tests for the entire HLLD solver. Unit tests are below
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test fixture for simple testing of the HLLD Riemann Solver.
-    Effectively takes the left state, right state, fiducial fluxes, and
-    custom user output then performs all the required running and testing
-    *
-    */
-    class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test
-    {
-    protected:
-        // =====================================================================
-        /*!
-        * \brief Compute and return the HLLD fluxes
-        *
-        * \param[in] leftState The state on the left side in conserved
-        * variables. In order the elements are: density, x-momentum,
-        * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
-        * y-magnetic field, z-magnetic field.
-        * \param[in] rightState The state on the right side in conserved
-        * variables. In order the elements are: density, x-momentum,
-        * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
-        * y-magnetic field, z-magnetic field.
-        * \param[in] gamma The adiabatic index
-        * \param[in] direction Which plane the interface is. 0 = plane normal to
-        * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0.
-        * \return std::vector<double>
-        */
-        std::vector<Real> computeFluxes(std::vector<Real> stateLeft,
-                                        std::vector<Real> stateRight,
-                                        Real const &gamma,
-                                        int const &direction=0)
-        {
-
-            // Rearrange X, Y, and Z values if a different direction is chosen
-            // besides default
-            stateLeft  = _cycleXYZ(stateLeft, direction);
-            stateRight = _cycleXYZ(stateRight, direction);
-
-            // Simulation Paramters
-            int const nx        = 1;  // Number of cells in the x-direction?
-            int const ny        = 1;  // Number of cells in the y-direction?
-            int const nz        = 1;  // Number of cells in the z-direction?
-            int const nGhost    = 0;  // Isn't actually used it appears
-            int nFields         = 8;  // Total number of conserved fields
-            #ifdef  SCALAR
-                nFields += NSCALARS;
-            #endif  // SCALAR
-            #ifdef  DE
-                nFields++;
-            #endif  //DE
-
-            // Launch Parameters
-            dim3 const dimGrid (1,1,1);  // How many blocks in the grid
-            dim3 const dimBlock(1,1,1);  // How many threads per block
-
-            // Create the std::vector to store the fluxes and declare the device
-            // pointers
-            std::vector<Real> testFlux(nFields);
-            Real *devConservedLeft;
-            Real *devConservedRight;
-            Real *devTestFlux;
-
-            // Allocate device arrays and copy data
-            CudaSafeCall(cudaMalloc(&devConservedLeft,  nFields*sizeof(Real)));
-            CudaSafeCall(cudaMalloc(&devConservedRight, nFields*sizeof(Real)));
-            CudaSafeCall(cudaMalloc(&devTestFlux,       nFields*sizeof(Real)));
-
-            CudaSafeCall(cudaMemcpy(devConservedLeft,
-                         stateLeft.data(),
-                         nFields*sizeof(Real),
-                         cudaMemcpyHostToDevice));
-            CudaSafeCall(cudaMemcpy(devConservedRight,
-                         stateRight.data(),
-                         nFields*sizeof(Real),
-                         cudaMemcpyHostToDevice));
-
-            // Run kernel
-            hipLaunchKernelGGL(Calculate_HLLD_Fluxes_CUDA,
-                               dimGrid,
-                               dimBlock,
-                               0,
-                               0,
-                               devConservedLeft,   // the "left" interface
-                               devConservedRight,  // the "right" interface
-                               devTestFlux,
-                               nx,
-                               ny,
-                               nz,
-                               nGhost,
-                               gamma,
-                               direction,
-                               nFields);
-
-            CudaCheckError();
-            CudaSafeCall(cudaMemcpy(testFlux.data(),
-                                    devTestFlux,
-                                    nFields*sizeof(Real),
-                                    cudaMemcpyDeviceToHost));
-
-            // Make sure to sync with the device so we have the results
-            cudaDeviceSynchronize();
-            CudaCheckError();
-
-            return testFlux;
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-        * \brief Check if the fluxes are correct
-        *
-        * \param[in] fiducialFlux The fiducial flux in conserved variables. In
-        * order the elements are: density, x-momentum,
-        * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
-        * y-magnetic field, z-magnetic field.
-        * \param[in] scalarFlux The fiducial flux in the passive scalars
-        * \param[in] thermalEnergyFlux The fiducial flux in the dual energy
-        * thermal energy
-        * \param[in] testFlux The test flux in conserved variables. In order the
-        * elements are: density, x-momentum,
-        * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
-        * y-magnetic field, z-magnetic field.
-        * \param[in] customOutput Any custom output the user would like to
-        * print. It will print after the default GTest output but before the
-        * values that failed are printed
-        * \param[in] direction Which plane the interface is. 0 = plane normal to
-        * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0.
-        */
-        void checkResults(std::vector<Real> fiducialFlux,
-                          std::vector<Real> scalarFlux,
-                          Real thermalEnergyFlux,
-                          std::vector<Real> const &testFlux,
-                          std::string const &customOutput = "",
-                          int const &direction=0)
-        {
-            // Field names
-            std::vector<std::string> fieldNames{"Densities",
-                                                "X Momentum",
-                                                "Y Momentum",
-                                                "Z Momentum",
-                                                "Energies",
-                                                "X Magnetic Field",
-                                                "Y Magnetic Field",
-                                                "Z Magnetic Field"};
-            #ifdef  DE
-                fieldNames.push_back("Thermal energy (dual energy)");
-                fiducialFlux.push_back(thermalEnergyFlux);
-            #endif  //DE
-            #ifdef  SCALAR
-                std::vector<std::string> scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"};
-                fieldNames.insert(fieldNames.begin()+5,
-                                  scalarNames.begin(),
-                                  scalarNames.begin() + NSCALARS);
-
-                fiducialFlux.insert(fiducialFlux.begin()+5,
-                                    scalarFlux.begin(),
-                                    scalarFlux.begin() + NSCALARS);
-            #endif  //SCALAR
-
-            // Rearrange X, Y, and Z values if a different direction is chosen
-            // besides default
-            fiducialFlux = _cycleXYZ(fiducialFlux, direction);
-
-            ASSERT_TRUE(    (fiducialFlux.size() == testFlux.size())
-                        and (fiducialFlux.size() == fieldNames.size()))
-                << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl
-                << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl
-                << "testFlux.size() = "     << testFlux.size()     << std::endl
-                << "fieldNames.size() = "   << fieldNames.size()   << std::endl;
-
-            // Check for equality
-            for (size_t i = 0; i < fieldNames.size(); i++)
-            {
-                // Check for equality and if not equal return difference
-                double absoluteDiff;
-                int64_t ulpsDiff;
-
-                bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i],
-                                                                 testFlux[i],
-                                                                 absoluteDiff,
-                                                                 ulpsDiff);
-                EXPECT_TRUE(areEqual)
-                    << std::endl << customOutput << std::endl
-                    << "There's a difference in "      << fieldNames[i]   << " Flux" << std::endl
-                    << "The direction is:       "      << direction       << " (0=X, 1=Y, 2=Z)" << std::endl
-                    << "The fiducial value is:       " << fiducialFlux[i] << std::endl
-                    << "The test value is:           " << testFlux[i]     << std::endl
-                    << "The absolute difference is:  " << absoluteDiff    << std::endl
-                    << "The ULP difference is:       " << ulpsDiff        << std::endl;
-            }
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-         * \brief Convert a vector of quantities in primitive variables  to
-         * conserved variables
-         *
-         * \param[in] input The state in primitive variables. In order the
-         * elements are: density, x-momentum,
-         * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
-         * y-magnetic field, z-magnetic field.
-         * \return std::vector<Real> The state in conserved variables. In order
-         * the elements are: density, x-momentum,
-         * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
-         * y-magnetic field, z-magnetic field.
-         */
-        std::vector<Real> primitive2Conserved(std::vector<Real> const &input,
-                                              double const &gamma,
-                                              std::vector<Real> const &primitiveScalars)
-        {
-            std::vector<Real> output(input.size());
-            output.at(0) = input.at(0);  // Density
-            output.at(1) = input.at(1) * input.at(0);  // X Velocity to momentum
-            output.at(2) = input.at(2) * input.at(0);  // Y Velocity to momentum
-            output.at(3) = input.at(3) * input.at(0);  // Z Velocity to momentum
-            output.at(4) = mhdUtils::computeEnergy(input.at(4),
-                                                   input.at(0),
-                                                   input.at(1),
-                                                   input.at(2),
-                                                   input.at(3),
-                                                   input.at(5),
-                                                   input.at(6),
-                                                   input.at(7),
-                                                   gamma);  // Pressure to Energy
-            output.at(5) = input.at(5);  // X Magnetic Field
-            output.at(6) = input.at(6);  // Y Magnetic Field
-            output.at(7) = input.at(7);  // Z Magnetic Field
-
-            #ifdef SCALAR
-                std::vector<Real> conservedScalar(primitiveScalars.size());
-                std::transform(primitiveScalars.begin(),
-                               primitiveScalars.end(),
-                               conservedScalar.begin(),
-                               [&](Real const &c){ return c*output.at(0); });
-                output.insert(output.begin()+5,
-                              conservedScalar.begin(),
-                              conservedScalar.begin() + NSCALARS);
-            #endif //SCALAR
-            #ifdef  DE
-                output.push_back(mhdUtils::computeThermalEnergy(output.at(4),
-                                                                output.at(0),
-                                                                output.at(1),
-                                                                output.at(2),
-                                                                output.at(3),
-                                                                output.at(5 + NSCALARS),
-                                                                output.at(6 + NSCALARS),
-                                                                output.at(7 + NSCALARS),
-                                                                gamma));
-            #endif  //DE
-            return output;
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-         * \brief On test start make sure that the number of NSCALARS is allowed
-         *
-         */
-        void SetUp()
-        {
-            #ifdef  SCALAR
-                ASSERT_LE(NSCALARS, 3) << "Only up to 3 passive scalars are currently supported in HLLD tests. NSCALARS = " << NSCALARS;
-                ASSERT_GE(NSCALARS, 1) << "There must be at least 1 passive scalar to test with passive scalars. NSCALARS = " << NSCALARS;
-            #endif  //SCALAR
-        }
-        // =====================================================================
-    private:
-        // =====================================================================
-        /*!
-         * \brief Cyclically permute the vector quantities in the list of
-         * conserved variables so that the same interfaces and fluxes can be
-         * used to test the HLLD solver in all 3 directions.
-         *
-         * \param[in,out] conservedVec The std::vector of conserved variables to
-         * be cyclically permutated
-         * \param[in] direction Which plane the interface is. 0 = plane normal
-         * to X, 1 = plane normal to Y, 2 = plane normal to Z
-         *
-         * \return std::vector<Real> The cyclically permutated list of conserved
-         * variables
-         */
-        std::vector<Real> inline _cycleXYZ(std::vector<Real> conservedVec,
-                                           int const &direction)
-        {
-            switch (direction)
-            {
-            case 0:  // Plane normal to X. Default case, do nothing
-                ;
-                break;
-            case 1:  // Plane normal to Y
-            case 2:  // Plane normal to Z
-                // Fall through for both Y and Z normal planes
-                {
-                    size_t shift = 3 - direction;
-                    auto momentumBegin = conservedVec.begin()+1;
-                    auto magneticBegin = conservedVec.begin()+5;
-                    #ifdef  SCALAR
-                        magneticBegin += NSCALARS;
-                    #endif  //SCALAR
-
-                    std::rotate(momentumBegin, momentumBegin+shift, momentumBegin+3);
-                    std::rotate(magneticBegin, magneticBegin+shift, magneticBegin+3);
-                }
-                break;
-            default:
-                throw std::invalid_argument(("Invalid Value of `direction`"
-                    " passed to `_cycleXYZ`. Value passed was "
-                     + std::to_string(direction) + ", should be 0, 1, or 2."));
-                break;
-            }
-            return conservedVec;
-        }
-        // =====================================================================
-    };
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test the HLLD Riemann Solver using various states and waves from
-    * the Brio & Wu Shock tube
-    *
-    */
-    TEST_F(tMHDCalculateHLLDFluxesCUDA,
-           BrioAndWuShockTubeCorrectInputExpectCorrectOutput)
-    {
-        // Constant Values
-        Real const gamma = 2.;
-        Real const Vz    = 0.0;
-        Real const Bx    = 0.75;
-        Real const Bz    = 0.0;
-        std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
-
-        // States
-        std::vector<Real> const                       // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars |
-            leftICs                = primitive2Conserved({1.0,       0.0,         0.0,        Vz,           1.0,       Bx,                1.0     ,          Bz},               gamma,            primitiveScalar),
-            leftFastRareLeftSide   = primitive2Conserved({0.978576,  0.038603,   -0.011074,   Vz,           0.957621,  Bx,                0.970288,          Bz},               gamma,            primitiveScalar),
-            leftFastRareRightSide  = primitive2Conserved({0.671655,  0.647082,   -0.238291,   Vz,           0.451115,  Bx,                0.578240,          Bz},               gamma,            primitiveScalar),
-            compoundLeftSide       = primitive2Conserved({0.814306,  0.506792,   -0.911794,   Vz,           0.706578,  Bx,               -0.108819,          Bz},               gamma,            primitiveScalar),
-            compoundPeak           = primitive2Conserved({0.765841,  0.523701,   -1.383720,   Vz,           0.624742,  Bx,               -0.400787,          Bz},               gamma,            primitiveScalar),
-            compoundRightSide      = primitive2Conserved({0.695211,  0.601089,   -1.583720,   Vz,           0.515237,  Bx,               -0.537027,          Bz},               gamma,            primitiveScalar),
-            contactLeftSide        = primitive2Conserved({0.680453,  0.598922,   -1.584490,   Vz,           0.515856,  Bx,               -0.533616,          Bz},               gamma,            primitiveScalar),
-            contactRightSide       = primitive2Conserved({0.231160,  0.599261,   -1.584820,   Vz,           0.516212,  Bx,               -0.533327,          Bz},               gamma,            primitiveScalar),
-            slowShockLeftSide      = primitive2Conserved({0.153125,  0.086170,   -0.683303,   Vz,           0.191168,  Bx,               -0.850815,          Bz},               gamma,            primitiveScalar),
-            slowShockRightSide     = primitive2Conserved({0.117046, -0.238196,   -0.165561,   Vz,           0.087684,  Bx,               -0.903407,          Bz},               gamma,            primitiveScalar),
-            rightFastRareLeftSide  = primitive2Conserved({0.117358, -0.228756,   -0.158845,   Vz,           0.088148,  Bx,               -0.908335,          Bz},               gamma,            primitiveScalar),
-            rightFastRareRightSide = primitive2Conserved({0.124894, -0.003132,   -0.002074,   Vz,           0.099830,  Bx,               -0.999018,          Bz},               gamma,            primitiveScalar),
-            rightICs               = primitive2Conserved({0.128,     0.0,         0.0,        Vz,           0.1,       Bx,               -1.0,               Bz},               gamma,            primitiveScalar);
-
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            // Initial Condition Checks
-            {
-                std::string const outputString {"Left State:  Left Brio & Wu state\n"
-                                                "Right State: Left Brio & Wu state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0};
-                std::vector<Real> const scalarFlux{0, 0, 0};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Brio & Wu state\n"
-                                                "Right State: Right Brio & Wu state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0};
-                std::vector<Real> const scalarFlux{0, 0, 0};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left Brio & Wu state\n"
-                                                "Right State: Right Brio & Wu state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.20673357746080057, 0.4661897584603672, 0.061170028480309613, 0, 0.064707291981509041, 0.0, 1.0074980455427278, 0};
-                std::vector<Real> const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145};
-                Real thermalEnergyFlux = 0.20673357746080046;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Brio & Wu state\n"
-                                                "Right State: Left Brio & Wu state\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.20673357746080057, 0.4661897584603672, 0.061170028480309613, 0, -0.064707291981509041, 0.0, -1.0074980455427278, 0};
-                std::vector<Real> const scalarFlux{-0.22885355953447648, -0.46073027567244362, -0.6854281091039145};
-                Real thermalEnergyFlux = -0.20673357746080046;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-
-            // Cross wave checks
-            {
-                std::string const outputString {"Left State:  Left of left fast rarefaction\n"
-                                                "Right State: Right of left fast rarefaction\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.4253304970883941, 0.47729308161522394, -0.55321646324583107, 0, 0.92496835095531071, 0.0, 0.53128887284876058, 0};
-                std::vector<Real> const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979};
-                Real thermalEnergyFlux = 0.41622256825457099;
-                std::vector<Real> const testFluxes = computeFluxes(leftFastRareLeftSide,
-                                                                   leftFastRareRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of left fast rarefaction\n"
-                                                "Right State: Left of left fast rarefaction\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.070492123816403796, 1.2489600267034342, -0.71031457071286608, 0, 0.21008080091470105, 0.0, 0.058615131833681167, 0};
-                std::vector<Real> const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341};
-                Real thermalEnergyFlux = 0.047345816580591255;
-                std::vector<Real> const testFluxes = computeFluxes(leftFastRareRightSide,
-                                                                   leftFastRareLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of compound wave\n"
-                                                "Right State: Right of compound wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.4470171023231666, 0.60747660800918468, -0.20506357956052623, 0, 0.72655525704800772, 0.0, 0.76278089951123285, 0};
-                std::vector<Real> const scalarFlux{0.4948468279606959, 0.99623058485843297, 1.482091544807598};
-                Real thermalEnergyFlux = 0.38787931087981475;
-                std::vector<Real> const testFluxes = computeFluxes(compoundLeftSide,
-                                                                   compoundRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of compound wave\n"
-                                                "Right State: Left of compound wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.38496850292724116, 0.66092864409611585, -0.3473204105316457, 0, 0.89888639514227009, 0.0, 0.71658566275120927, 0};
-                std::vector<Real> const scalarFlux{0.42615918171426637, 0.85794792823389721, 1.2763685331959034};
-                Real thermalEnergyFlux = 0.28530908823756074;
-                std::vector<Real> const testFluxes = computeFluxes(compoundRightSide,
-                                                                   compoundLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of Compound Wave\n"
-                                                "Right State: Peak of Compound Wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.41864266180405574, 0.63505764056357727, -0.1991008813536404, 0, 0.73707474818824525, 0.0, 0.74058225030218761, 0};
-                std::vector<Real> const scalarFlux{0.46343639240225803, 0.93299478173931882, 1.388015684704111};
-                Real thermalEnergyFlux = 0.36325864563467081;
-                std::vector<Real> const testFluxes = computeFluxes(compoundLeftSide,
-                                                                   compoundPeak,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Peak of Compound Wave\n"
-                                                "Right State: Left of Compound Wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.39520761138156862, 0.6390998385557225, -0.35132701297727598, 0, 0.89945171879176522, 0.0, 0.71026545717401468, 0};
-                std::vector<Real> const scalarFlux{0.43749384947851333, 0.88076699477714815, 1.3103164425435772};
-                Real thermalEnergyFlux = 0.32239432669410983;
-                std::vector<Real> const testFluxes = computeFluxes(compoundPeak,
-                                                                   compoundLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Peak of Compound Wave\n"
-                                                "Right State: Right of Compound Wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.4285899590904928, 0.6079309920345296, -0.26055320217638239, 0, 0.75090757444649436, 0.0, 0.85591904930227747, 0};
-                std::vector<Real> const scalarFlux{0.47444802592454061, 0.95516351251477749, 1.4209960899845735};
-                Real thermalEnergyFlux = 0.34962629086469987;
-                std::vector<Real> const testFluxes = computeFluxes(compoundPeak,
-                                                                   compoundRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of Compound Wave\n"
-                                                "Right State: Peak of Compound Wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.39102247793946454, 0.65467021266207581, -0.25227691377588229, 0, 0.76271525822813691, 0.0, 0.83594460438033491, 0};
-                std::vector<Real> const scalarFlux{0.43286091709705776, 0.8714399289555731, 1.2964405732397004};
-                Real thermalEnergyFlux = 0.28979582956267347;
-                std::vector<Real> const testFluxes = computeFluxes(compoundRightSide,
-                                                                   compoundPeak,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of contact discontinuity\n"
-                                                "Right State: Right of contact discontinuity\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.40753761783585118, 0.62106392255463172, -0.2455554035355339, 0, 0.73906344777217226, 0.0, 0.8687394222350926, 0};
-                std::vector<Real> const scalarFlux{0.45114313616335622, 0.90824587528847567, 1.3511967538747176};
-                Real thermalEnergyFlux = 0.30895701155896288;
-                std::vector<Real> const testFluxes = computeFluxes(contactLeftSide,
-                                                                   contactRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of contact discontinuity\n"
-                                                "Right State: Left of contact discontinuity\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.13849588572126192, 0.46025037934770729, 0.18052412687974539, 0, 0.35385590617992224, 0.0, 0.86909622543144227, 0};
-                std::vector<Real> const scalarFlux{0.15331460335320088, 0.30865449334158279, 0.45918507401922254};
-                Real thermalEnergyFlux = 0.30928031735570188;
-                std::vector<Real> const testFluxes = computeFluxes(contactRightSide,
-                                                                   contactLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Slow shock left side\n"
-                                                "Right State: Slow shock right side\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{3.5274134848883865e-05, 0.32304849716274459, 0.60579784881286636, 0, -0.32813070621836449, 0.0, 0.40636483121437972, 0};
-                std::vector<Real> const scalarFlux{3.9048380136491711e-05, 7.8612589559210735e-05, 0.00011695189454326261};
-                Real thermalEnergyFlux = 4.4037784886918126e-05;
-                std::vector<Real> const testFluxes = computeFluxes(slowShockLeftSide,
-                                                                   slowShockRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Slow shock right side\n"
-                                                "Right State: Slow shock left side\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.016514307834939734, 0.16452009375678914, 0.71622171077118635, 0, -0.37262428139914472, 0.0, 0.37204015363322052, 0};
-                std::vector<Real> const scalarFlux{-0.018281297976332211, -0.036804091985367396, -0.054753421923485097};
-                Real thermalEnergyFlux = -0.020617189878790236;
-                std::vector<Real> const testFluxes = computeFluxes(slowShockRightSide,
-                                                                   slowShockLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right fast rarefaction left side\n"
-                                                "Right State: Right fast rarefaction right side\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.026222824218991747, 0.22254903570732654, 0.68544334213642255, 0, -0.33339172106895454, 0.0, 0.32319665359522443, 0};
-                std::vector<Real> const scalarFlux{-0.029028601629558917, -0.058440671223894146, -0.086942145734385745};
-                Real thermalEnergyFlux = -0.020960370728633469;
-                std::vector<Real> const testFluxes = computeFluxes(rightFastRareLeftSide,
-                                                                   rightFastRareRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right fast rarefaction right side\n"
-                                                "Right State: Right fast rarefaction left side\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.001088867226159973, 0.32035322820305906, 0.74922357263343131, 0, -0.0099746892805345766, 0.0, 0.0082135595470345102, 0};
-                std::vector<Real> const scalarFlux{-0.0012053733294214947, -0.0024266696462237609, -0.0036101547366371614};
-                Real thermalEnergyFlux = -0.00081785194236053073;
-                std::vector<Real> const testFluxes = computeFluxes(rightFastRareRightSide,
-                                                                   rightFastRareLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test the HLLD Riemann Solver using various states and waves from
-    * the Dai & Woodward Shock tube
-    *
-    */
-    TEST_F(tMHDCalculateHLLDFluxesCUDA,
-           DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput)
-    {
-        // Constant Values
-        Real const gamma = 5./3.;
-        Real const coef = 1. / (std::sqrt(4. * M_PI));
-        Real const Bx = 4. * coef;
-        std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
-
-        // States
-        std::vector<Real> const                       // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars |
-            leftICs                 = primitive2Conserved({1.08,     0.0,         0.0,         0.0,         1.0,       Bx,                3.6*coef,          2*coef},           gamma,            primitiveScalar),
-            leftFastShockLeftSide   = primitive2Conserved({1.09406,  1.176560,    0.021003,    0.506113,    0.970815,  1.12838,           1.105355,          0.614087},         gamma,            primitiveScalar),
-            leftFastShockRightSide  = primitive2Conserved({1.40577,  0.693255,    0.210562,    0.611423,    1.494290,  1.12838,           1.457700,          0.809831},         gamma,            primitiveScalar),
-            leftRotationLeftSide    = primitive2Conserved({1.40086,  0.687774,    0.215124,    0.609161,    1.485660,  1.12838,           1.458735,          0.789960},         gamma,            primitiveScalar),
-            leftRotationRightSide   = primitive2Conserved({1.40119,  0.687504,    0.330268,    0.334140,    1.486570,  1.12838,           1.588975,          0.475782},         gamma,            primitiveScalar),
-            leftSlowShockLeftSide   = primitive2Conserved({1.40519,  0.685492,    0.326265,    0.333664,    1.493710,  1.12838,           1.575785,          0.472390},         gamma,            primitiveScalar),
-            leftSlowShockRightSide  = primitive2Conserved({1.66488,  0.578545,    0.050746,    0.250260,    1.984720,  1.12838,           1.344490,          0.402407},         gamma,            primitiveScalar),
-            contactLeftSide         = primitive2Conserved({1.65220,  0.578296,    0.049683,    0.249962,    1.981250,  1.12838,           1.346155,          0.402868},         gamma,            primitiveScalar),
-            contactRightSide        = primitive2Conserved({1.49279,  0.578276,    0.049650,    0.249924,    1.981160,  1.12838,           1.346180,          0.402897},         gamma,            primitiveScalar),
-            rightSlowShockLeftSide  = primitive2Conserved({1.48581,  0.573195,    0.035338,    0.245592,    1.956320,  1.12838,           1.370395,          0.410220},         gamma,            primitiveScalar),
-            rightSlowShockRightSide = primitive2Conserved({1.23813,  0.450361,   -0.275532,    0.151746,    1.439000,  1.12838,           1.609775,          0.482762},         gamma,            primitiveScalar),
-            rightRotationLeftSide   = primitive2Conserved({1.23762,  0.450102,   -0.274410,    0.145585,    1.437950,  1.12838,           1.606945,          0.493879},         gamma,            primitiveScalar),
-            rightRotationRightSide  = primitive2Conserved({1.23747,  0.449993,   -0.180766,   -0.090238,    1.437350,  1.12838,           1.503855,          0.752090},         gamma,            primitiveScalar),
-            rightFastShockLeftSide  = primitive2Conserved({1.22305,  0.424403,   -0.171402,   -0.085701,    1.409660,  1.12838,           1.447730,          0.723864},         gamma,            primitiveScalar),
-            rightFastShockRightSide = primitive2Conserved({1.00006,  0.000121,   -0.000057,   -0.000028,    1.000100,  1.12838,           1.128435,          0.564217},         gamma,            primitiveScalar),
-            rightICs                = primitive2Conserved({1.0,      0.0,         0.0,         1.0,         0.2,       Bx,                4*coef,            2*coef},           gamma,            primitiveScalar);
-
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            // Initial Condition Checks
-            {
-                std::string const outputString {"Left State:  Left Dai & Woodward state\n"
-                                                "Right State: Left Dai & Woodward state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0, 1.0381971863420549, -1.1459155902616465, -0.63661977236758127, 0, 0.0, 0, -1.1102230246251565e-16};
-                std::vector<Real> const scalarFlux{0,0,0};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Dai & Woodward state\n"
-                                                "Right State: Right Dai & Woodward state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0, 0.35915494309189522, -1.2732395447351625, -0.63661977236758127, -0.63661977236758172, 0.0, 2.2204460492503131e-16, -1.1283791670955123};
-                std::vector<Real> const scalarFlux{0,0,0};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left Dai & Woodward state\n"
-                                                "Right State: Right Dai & Woodward state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.17354924587196074, 0.71614983677687327, -1.1940929411768009, -1.1194725181819352, -0.11432087006939984, 0.0, 0.056156000248263505, -0.42800560867873094};
-                std::vector<Real> const scalarFlux{0.19211858644420357, 0.38677506032368902, 0.57540498691841158};
-                Real thermalEnergyFlux = 0.24104061926661174;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Dai & Woodward state\n"
-                                                "Right State: Left Dai & Woodward state\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.17354924587196074, 0.71614983677687327, -1.1940929411768009, -0.14549552299758384, -0.47242308031148195, 0.0, -0.056156000248263505, -0.55262526758377528};
-                std::vector<Real> const scalarFlux{-0.19211858644420357, -0.38677506032368902, -0.57540498691841158};
-                Real thermalEnergyFlux = -0.24104061926661174;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-
-            // Cross wave checks
-            {
-                std::string const outputString {"Left State:  Left of left fast shock\n"
-                                                "Right State: Right of left fast shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.96813688187727132, 3.0871217875403394, -1.4687093290523414, -0.33726008721080036, 4.2986213406773457, 0.0, 0.84684181393860269, -0.087452560407274671};
-                std::vector<Real> const scalarFlux{1.0717251365527865, 2.157607767226648, 3.2098715673061045};
-                Real thermalEnergyFlux = 1.2886155333980993;
-                std::vector<Real> const testFluxes = computeFluxes(leftFastShockLeftSide,
-                                                                   leftFastShockRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of left fast shock\n"
-                                                "Right State: Left of left fast shock\n"
-                                                "HLLD State: Left Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{1.3053938862274184, 2.4685129176021858, -1.181892850065283, -0.011160487372167127, 5.1797404608257249, 0.0, 1.1889903073770265, 0.10262704114294516};
-                std::vector<Real> const scalarFlux{1.4450678072086958, 2.9092249669830292, 4.3280519500627666};
-                Real thermalEnergyFlux = 2.081389946702628;
-                std::vector<Real> const testFluxes = computeFluxes(leftFastShockRightSide,
-                                                                   leftFastShockLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of left rotation/Alfven wave\n"
-                                                "Right State: Right of left rotation/Alfven wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.96326128304298586, 2.8879592118317445, -1.4808188010794987, -0.20403672861184916, 4.014027751838869, 0.0, 0.7248753989305099, -0.059178137562467162};
-                std::vector<Real> const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724};
-                Real thermalEnergyFlux = 1.5323573637968553;
-                std::vector<Real> const testFluxes = computeFluxes(leftRotationLeftSide,
-                                                                   leftRotationRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of left rotation/Alfven wave\n"
-                                                "Right State: Left of left rotation/Alfven wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.96353754504060063, 2.8875487093397085, -1.4327309336053695, -0.31541343522923493, 3.9739842521208342, 0.0, 0.75541746728406312, -0.13479771672887678};
-                std::vector<Real> const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313};
-                Real thermalEnergyFlux = 1.5333744977458499;
-                std::vector<Real> const testFluxes = computeFluxes(leftRotationRightSide,
-                                                                   leftRotationLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of left slow shock\n"
-                                                "Right State: Right of left slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.88716095730727451, 2.9828594399125663, -1.417062582518549, -0.21524331343191233, 3.863474778369334, 0.0, 0.71242370728996041, -0.05229712416644372};
-                std::vector<Real> const scalarFlux{0.98208498809672407, 1.9771433235295921, 2.9413947405483505};
-                Real thermalEnergyFlux = 1.4145715457049737;
-                std::vector<Real> const testFluxes = computeFluxes(leftSlowShockLeftSide,
-                                                                   leftSlowShockRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of left slow shock\n"
-                                                "Right State: Left of left slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{1.042385440439527, 2.7732383399777376, -1.5199872074603551, -0.21019362664841068, 4.1322001036232585, 0.0, 0.72170937317481543, -0.049474715634396704};
-                std::vector<Real> const scalarFlux{1.1539181074575644, 2.323079478570472, 3.4560437166206879};
-                Real thermalEnergyFlux = 1.8639570701934713;
-                std::vector<Real> const testFluxes = computeFluxes(leftSlowShockRightSide,
-                                                                   leftSlowShockLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of contact discontinuity\n"
-                                                "Right State: Right of contact discontinuity\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.95545795601418737, 2.8843900822429749, -1.4715039715239722, -0.21575736014726318, 4.0078718055059257, 0.0, 0.72241353110189066, -0.049073560388753337};
-                std::vector<Real> const scalarFlux{1.0576895969443709, 2.1293512784652289, 3.1678344087247892};
-                Real thermalEnergyFlux = 1.7186185770667382;
-                std::vector<Real> const testFluxes = computeFluxes(contactLeftSide,
-                                                                   contactRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of contact discontinuity\n"
-                                                "Right State: Left of contact discontinuity\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.86324813554422819, 2.8309913324581251, -1.4761428591480787, -0.23887765947428419, 3.9892942559102793, 0.0, 0.72244123046603836, -0.049025527032060034};
-                std::vector<Real> const scalarFlux{0.95561355347926669, 1.9238507665182214, 2.8621114407298114};
-                Real thermalEnergyFlux = 1.7184928987481187;
-                std::vector<Real> const testFluxes = computeFluxes(contactRightSide,
-                                                                   contactLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of right slow shock\n"
-                                                "Right State: Right of right slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.81125524370350677, 2.901639500435365, -1.5141545346789429, -0.262600896007809, 3.8479660419540087, 0.0, 0.7218977970017596, -0.049091614519593846};
-                std::vector<Real> const scalarFlux{0.89805755065482806, 1.8079784457999033, 2.6897282701827465};
-                Real thermalEnergyFlux = 1.6022319728249694;
-                std::vector<Real> const testFluxes = computeFluxes(rightSlowShockLeftSide,
-                                                                   rightSlowShockRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of right slow shock\n"
-                                                "Right State: Left of right slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.60157947557836688, 2.3888357198399746, -1.9910500022202977, -0.45610948442354332, 3.5359430988850069, 0.0, 1.0670963294022622, 0.05554893654378229};
-                std::vector<Real> const scalarFlux{0.66594699332331575, 1.3406911495770899, 1.994545286188885};
-                Real thermalEnergyFlux = 1.0487665253534804;
-                std::vector<Real> const testFluxes = computeFluxes(rightSlowShockRightSide,
-                                                                   rightSlowShockLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of right rotation/Alfven wave\n"
-                                                "Right State: Right of right rotation/Alfven wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.55701691287884714, 2.4652223621237814, -1.9664615862227277, -0.47490477894092042, 3.3900659850690529, 0.0, 1.0325648885587542, 0.059165409025635551};
-                std::vector<Real> const scalarFlux{0.61661634650230224, 1.2413781978573175, 1.8467974773272691};
-                Real thermalEnergyFlux = 0.9707694646266285;
-                std::vector<Real> const testFluxes = computeFluxes(rightRotationLeftSide,
-                                                                   rightRotationRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of right rotation/Alfven wave\n"
-                                                "Right State: Left of right rotation/Alfven wave\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.55689116371132596, 2.4648517303940851, -1.7972202655166787, -0.90018282739798461, 3.3401033852664566, 0.0, 0.88105841856465605, 0.43911718823267476};
-                std::vector<Real> const scalarFlux{0.61647714248450702, 1.2410979509359938, 1.8463805541782863};
-                Real thermalEnergyFlux = 0.9702629326292449;
-                std::vector<Real> const testFluxes = computeFluxes(rightRotationRightSide,
-                                                                   rightRotationLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of right fast shock\n"
-                                                "Right State: Right of right fast shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.48777637414577313, 2.3709438477809708, -1.7282900552525988, -0.86414423547773778, 2.8885015704245069, 0.0, 0.77133731061645838, 0.38566794697432505};
-                std::vector<Real> const scalarFlux{0.53996724117661621, 1.0870674521621893, 1.6172294888076189};
-                Real thermalEnergyFlux = 0.84330016382608752;
-                std::vector<Real> const testFluxes = computeFluxes(rightFastShockLeftSide,
-                                                                   rightFastShockRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of right fast shock\n"
-                                                "Right State: Left of right fast shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.040639426423817904, 1.0717156491947966, -1.2612066401572222, -0.63060225433149875, 0.15803727234007203, 0.0, 0.042555541396817498, 0.021277678888288909};
-                std::vector<Real> const scalarFlux{0.044987744655527385, 0.090569777630660403, 0.13474059488003065};
-                Real thermalEnergyFlux = 0.060961577855018087;
-                std::vector<Real> const testFluxes = computeFluxes(rightFastShockRightSide,
-                                                                   rightFastShockLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test the HLLD Riemann Solver using various states and waves from
-    * the Ryu & Jones 4d Shock tube
-    *
-    */
-    TEST_F(tMHDCalculateHLLDFluxesCUDA,
-           RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput)
-    {
-        // Constant Values
-        Real const gamma = 5./3.;
-        Real const Bx = 0.7;
-        std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
-
-        // States
-        std::vector<Real> const                           // | Density | X-Velocity | Y-Velocity |  Z-Velocity |  Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars |
-            leftICs                    = primitive2Conserved({1.0,       0.0,         0.0,          0.0,          1.0,       Bx,                0.0,               0.0},              gamma,            primitiveScalar),
-            hydroRareLeftSide          = primitive2Conserved({0.990414,  0.012415,    1.458910e-58, 6.294360e-59, 0.984076,  Bx,                1.252355e-57,      5.366795e-58},     gamma,            primitiveScalar),
-            hydroRareRightSide         = primitive2Conserved({0.939477,  0.079800,    1.557120e-41, 7.505190e-42, 0.901182,  Bx,                1.823624e-40,      8.712177e-41},     gamma,            primitiveScalar),
-            switchOnSlowShockLeftSide  = primitive2Conserved({0.939863,  0.079142,    1.415730e-02, 7.134030e-03, 0.901820,  Bx,                2.519650e-02,      1.290082e-02},     gamma,            primitiveScalar),
-            switchOnSlowShockRightSide = primitive2Conserved({0.651753,  0.322362,    8.070540e-01, 4.425110e-01, 0.490103,  Bx,                6.598380e-01,      3.618000e-01},     gamma,            primitiveScalar),
-            contactLeftSide            = primitive2Conserved({0.648553,  0.322525,    8.072970e-01, 4.426950e-01, 0.489951,  Bx,                6.599295e-01,      3.618910e-01},     gamma,            primitiveScalar),
-            contactRightSide           = primitive2Conserved({0.489933,  0.322518,    8.073090e-01, 4.426960e-01, 0.489980,  Bx,                6.599195e-01,      3.618850e-01},     gamma,            primitiveScalar),
-            slowShockLeftSide          = primitive2Conserved({0.496478,  0.308418,    8.060830e-01, 4.420150e-01, 0.489823,  Bx,                6.686695e-01,      3.666915e-01},     gamma,            primitiveScalar),
-            slowShockRightSide         = primitive2Conserved({0.298260, -0.016740,    2.372870e-01, 1.287780e-01, 0.198864,  Bx,                8.662095e-01,      4.757390e-01},     gamma,            primitiveScalar),
-            rotationLeftSide           = primitive2Conserved({0.298001, -0.017358,    2.364790e-01, 1.278540e-01, 0.198448,  Bx,                8.669425e-01,      4.750845e-01},     gamma,            primitiveScalar),
-            rotationRightSide          = primitive2Conserved({0.297673, -0.018657,    1.059540e-02, 9.996860e-01, 0.197421,  Bx,                9.891580e-01,      1.024949e-04},     gamma,            primitiveScalar),
-            fastRareLeftSide           = primitive2Conserved({0.297504, -0.020018,    1.137420e-02, 1.000000e+00, 0.197234,  Bx,                9.883860e-01, -    4.981931e-17},     gamma,            primitiveScalar),
-            fastRareRightSide          = primitive2Conserved({0.299996, -0.000033,    1.855120e-05, 1.000000e+00, 0.199995,  Bx,                9.999865e-01,      1.737190e-16},     gamma,            primitiveScalar),
-            rightICs                   = primitive2Conserved({0.3,       0.0,         0.0,          1.0,          0.2,       Bx,                1.0,               0.0},              gamma,            primitiveScalar);
-
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            // Initial Condition Checks
-            {
-                std::string const outputString {"Left State:  Left Ryu & Jones 4d state\n"
-                                                "Right State: Left Ryu & Jones 4d state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0};
-                std::vector<Real> const scalarFlux{0,0,0};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Ryu & Jones 4d state\n"
-                                                "Right State: Right Ryu & Jones 4d state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-5.5511151231257827e-17, 0.45500000000000013, -0.69999999999999996, -5.5511151231257827e-17, 0, 0.0, 0, -0.69999999999999996};
-                std::vector<Real> const scalarFlux{-6.1450707278254418e-17, -1.2371317869019906e-16, -1.8404800947169341e-16};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left Ryu & Jones 4d state\n"
-                                                "Right State: Right Ryu & Jones 4d state\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.092428729855986602, 0.53311593977445149, -0.39622049648437296, -0.21566989083797167, -0.13287876964320211, 0.0, -0.40407579574102892, -0.21994567048141428};
-                std::vector<Real> const scalarFlux{0.10231837561464294, 0.20598837745492582, 0.30644876517012837};
-                Real thermalEnergyFlux = 0.13864309478397996;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Ryu & Jones 4d state\n"
-                                                "Right State: Left Ryu & Jones 4d state\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.092428729855986602, 0.53311593977445149, -0.39622049648437296, 0.21566989083797167, 0.13287876964320211, 0.0, 0.40407579574102892, -0.21994567048141428};
-                std::vector<Real> const scalarFlux{-0.10231837561464294, -0.20598837745492582, -0.30644876517012837};
-                Real thermalEnergyFlux = -0.13864309478397996;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-
-            // Cross wave checks
-            {
-                std::string const outputString {"Left State:  Left side of pure hydrodynamic rarefaction\n"
-                                                "Right State: Right side of pure hydrodynamic rarefaction\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.074035256375659553, 0.66054553664209648, -6.1597070943493028e-41, -2.9447391900433873e-41, 0.1776649658235645, 0.0, -6.3466063324344113e-41, -3.0340891384335242e-41};
-                std::vector<Real> const scalarFlux{0.081956845911157775, 0.16499634214430131, 0.24546494288869905};
-                Real thermalEnergyFlux = 0.11034221894046368;
-                std::vector<Real> const testFluxes = computeFluxes(hydroRareLeftSide,
-                                                                   hydroRareRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right side of pure hydrodynamic rarefaction\n"
-                                                "Right State: Left side of pure hydrodynamic rarefaction\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.013336890338886076, 0.74071279157971992, -6.1745213352160876e-41, -2.9474651270630147e-41, 0.033152482405470307, 0.0, 6.2022392844946449e-41, 2.9606965476795895e-41};
-                std::vector<Real> const scalarFlux{0.014763904657692993, 0.029722840565719184, 0.044218649135708464};
-                Real thermalEnergyFlux = 0.019189877201961154;
-                std::vector<Real> const testFluxes = computeFluxes(hydroRareRightSide,
-                                                                   hydroRareLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of switch on slow shock\n"
-                                                "Right State: Right of switch on slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.19734622040826083, 0.47855039640569758, -0.3392293209655618, -0.18588204716255491, 0.10695446263054809, 0.0, -0.3558357543098733, -0.19525093130352045};
-                std::vector<Real> const scalarFlux{0.21846177846784187, 0.43980943806215089, 0.65430419361309078};
-                Real thermalEnergyFlux = 0.2840373040888583;
-                std::vector<Real> const testFluxes = computeFluxes(switchOnSlowShockLeftSide,
-                                                                   switchOnSlowShockRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of switch on slow shock\n"
-                                                "Right State: Left of switch on slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.097593254768855386, 0.76483698872352757, -0.02036438492698419, -0.010747481940703562, 0.25327551496496836, 0.0, -0.002520109973016129, -0.00088262199017708799};
-                std::vector<Real> const scalarFlux{0.10803549193474633, 0.21749813322875222, 0.32357182079044206};
-                Real thermalEnergyFlux = 0.1100817647375162;
-                std::vector<Real> const testFluxes = computeFluxes(switchOnSlowShockRightSide,
-                                                                   switchOnSlowShockLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of contact discontinuity\n"
-                                                "Right State: Right of contact discontinuity\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.2091677440314007, 0.5956612619664029, -0.29309091669513981, -0.16072556008504282, 0.19220050968424285, 0.0, -0.35226977371803297, -0.19316940226499904};
-                std::vector<Real> const scalarFlux{0.23154817591476573, 0.46615510432814616, 0.69349862290347741};
-                Real thermalEnergyFlux = 0.23702444986592192;
-                std::vector<Real> const testFluxes = computeFluxes(contactLeftSide,
-                                                                   contactRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of contact discontinuity\n"
-                                                "Right State: Left of contact discontinuity\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.15801775068597168, 0.57916072367837657, -0.33437339604094024, -0.18336617461176744, 0.16789791355547545, 0.0, -0.3522739911439669, -0.19317084712861482};
-                std::vector<Real> const scalarFlux{0.17492525964231936, 0.35216128279157616, 0.52391009427617696};
-                Real thermalEnergyFlux = 0.23704936434506069;
-                std::vector<Real> const testFluxes = computeFluxes(contactRightSide,
-                                                                   contactLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of slow shock\n"
-                                                "Right State: Right of slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.11744487326715558, 0.66868230621718128, -0.35832022960458892, -0.19650694834641164, 0.057880816021092185, 0.0, -0.37198011453582402, -0.20397277844271294};
-                std::vector<Real> const scalarFlux{0.13001118457092631, 0.26173981750473918, 0.38939014356639379};
-                Real thermalEnergyFlux = 0.1738058891582446;
-                std::vector<Real> const testFluxes = computeFluxes(slowShockLeftSide,
-                                                                   slowShockRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of slow shock\n"
-                                                "Right State: Left of slow shock\n"
-                                                "HLLD State: Left Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.038440990187426027, 0.33776683678923869, -0.62583241538732792, -0.3437911783906169, -0.13471828103488348, 0.0, -0.15165427985881363, -0.082233932588833825};
-                std::vector<Real> const scalarFlux{0.042554081172858457, 0.085670301959209896, 0.12745164834795927};
-                Real thermalEnergyFlux = 0.038445630017261548;
-                std::vector<Real> const testFluxes = computeFluxes(slowShockRightSide,
-                                                                   slowShockLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of rotation/Alfven wave\n"
-                                                "Right State: Right of rotation/Alfven wave\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.0052668366104996478, 0.44242247672452317, -0.60785196341731951, -0.33352435102145184, -0.21197843894720192, 0.0, -0.18030635192654354, -0.098381113757603278};
-                std::vector<Real> const scalarFlux{-0.0058303751166299484, -0.011737769516117116, -0.017462271505355991};
-                Real thermalEnergyFlux = -0.0052395622905745485;
-                std::vector<Real> const testFluxes = computeFluxes(rotationLeftSide,
-                                                                   rotationRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of rotation/Alfven wave\n"
-                                                "Right State: Left of rotation/Alfven wave\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.005459628948343731, 0.4415038084184626, -0.69273580053867279, -0.0051834737482743809, -0.037389286119015486, 0.0, -0.026148289294373184, -0.69914753968916865};
-                std::vector<Real> const scalarFlux{-0.0060437957583491572, -0.012167430087241717, -0.018101477236719343};
-                Real thermalEnergyFlux = -0.0054536013916442853;
-                std::vector<Real> const testFluxes = computeFluxes(rotationRightSide,
-                                                                   rotationLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left of fast rarefaction\n"
-                                                "Right State: Right of fast rarefaction\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.0059354802028144249, 0.44075681881443612, -0.69194176811725872, -0.0059354802028144804, -0.040194357552219451, 0.0, -0.027710302430178135, -0.70000000000000007};
-                std::vector<Real> const scalarFlux{-0.0065705619215052757, -0.013227920997059845, -0.019679168822056604};
-                Real thermalEnergyFlux = -0.0059354109546219782;
-                std::vector<Real> const testFluxes = computeFluxes(fastRareLeftSide,
-                                                                   fastRareRightSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right of fast rarefaction\n"
-                                                "Right State: Left of fast rarefaction\n"
-                                                "HLLD State: Right Double Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-3.0171858819483255e-05, 0.45503057873272706, -0.69998654276213712, -3.0171858819427744e-05, -0.00014827469339251387, 0.0, -8.2898844654399895e-05, -0.69999999999999984};
-                std::vector<Real> const scalarFlux{-3.340017317660794e-05, -6.7241562798797897e-05, -0.00010003522597924373};
-                Real thermalEnergyFlux = -3.000421709818028e-05;
-                std::vector<Real> const testFluxes = computeFluxes(fastRareRightSide,
-                                                                   fastRareLeftSide,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test the HLLD Riemann Solver using various states and waves from
-    * the Einfeldt Strong Rarefaction (EFR)
-    *
-    */
-    TEST_F(tMHDCalculateHLLDFluxesCUDA,
-           EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput)
-    {
-        // Constant Values
-        Real const gamma = 5./3.;
-        Real const V0 = 2.;
-        Real const Vy = 0.0;
-        Real const Vz = 0.0;
-        Real const Bx = 0.0;
-        Real const Bz = 0.0;
-
-        std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
-
-        // States
-        std::vector<Real> const                       // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars |
-            leftICs                = primitive2Conserved({1.0,      -V0,          Vy,          Vz,          0.45,      Bx,                0.5,               Bz},               gamma,            primitiveScalar),
-            leftRarefactionCenter  = primitive2Conserved({0.368580, -1.180830,    Vy,          Vz,          0.111253,  Bx,                0.183044,          Bz},               gamma,            primitiveScalar),
-            leftVxTurnOver         = primitive2Conserved({0.058814, -0.125475,    Vy,          Vz,          0.008819,  Bx,                0.029215,          Bz},               gamma,            primitiveScalar),
-            midPoint               = primitive2Conserved({0.034658,  0.000778,    Vy,          Vz,          0.006776,  Bx,                0.017333,          Bz},               gamma,            primitiveScalar),
-            rightVxTurnOver        = primitive2Conserved({0.062587,  0.152160,    Vy,          Vz,          0.009521,  Bx,                0.031576,          Bz},               gamma,            primitiveScalar),
-            rightRarefactionCenter = primitive2Conserved({0.316485,  1.073560,    Vy,          Vz,          0.089875,  Bx,                0.159366,          Bz},               gamma,            primitiveScalar),
-            rightICs               = primitive2Conserved({1.0,       V0,          Vy,          Vz,          0.45,      Bx,                0.5,               Bz},               gamma,            primitiveScalar);
-
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            // Initial Condition Checks
-            {
-                std::string const outputString {"Left State:  Left Einfeldt Strong Rarefaction state\n"
-                                                "Right State: Left Einfeldt Strong Rarefaction state\n"
-                                                "HLLD State: Right"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0};
-                std::vector<Real> const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996};
-                Real thermalEnergyFlux = -1.3499999999999996;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Einfeldt Strong Rarefaction state\n"
-                                                "Right State: Right Einfeldt Strong Rarefaction state\n"
-                                                "HLLD State: Left"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0};
-                std::vector<Real> const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996};
-                Real thermalEnergyFlux = 1.3499999999999996;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left Einfeldt Strong Rarefaction state\n"
-                                                "Right State: Right Einfeldt Strong Rarefaction state\n"
-                                                "HLLD State: Left Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0};
-                std::vector<Real> const scalarFlux{0,0,0};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Einfeldt Strong Rarefaction state\n"
-                                                "Right State: Left Einfeldt Strong Rarefaction state\n"
-                                                "HLLD State: Left Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0};
-                std::vector<Real> const scalarFlux{0,0,0};
-                Real thermalEnergyFlux = 0.0;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-
-            // Intermediate state checks
-            {
-                std::string const outputString {"Left State:  Left Einfeldt Strong Rarefaction state\n"
-                                                "Right State: Left rarefaction center\n"
-                                                "HLLD State: Right"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.43523032140000006, 0.64193857338676208, -0, -0, -0.67142479846795033, 0.0, -0.21614384652000002, -0};
-                std::vector<Real> const scalarFlux{-0.48179889059681413, -0.9699623468164007, -1.4430123054318851};
-                Real thermalEnergyFlux = -0.19705631998499995;
-                std::vector<Real> const testFluxes = computeFluxes(leftICs,
-                                                                   leftRarefactionCenter,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left rarefaction center\n"
-                                                "Right State: Left Einfeldt Strong Rarefaction state\n"
-                                                "HLLD State: Right"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0};
-                std::vector<Real> const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996};
-                Real thermalEnergyFlux = -1.3499999999999996;
-                std::vector<Real> const testFluxes = computeFluxes(leftRarefactionCenter,
-                                                                   leftICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left rarefaction center\n"
-                                                "Right State: Left Vx turnover point\n"
-                                                "HLLD State: Right Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.023176056428381629, -2.0437812714100764e-05, 0, 0, -0.00098843768795337005, 0.0, -0.011512369309265979, 0};
-                std::vector<Real> const scalarFlux{-0.025655837212088663, -0.051650588155052128, -0.076840543898599858};
-                Real thermalEnergyFlux = -0.0052127803322822184;
-                std::vector<Real> const testFluxes = computeFluxes(leftRarefactionCenter,
-                                                                   leftVxTurnOver,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left Vx turnover point\n"
-                                                "Right State: Left rarefaction center\n"
-                                                "HLLD State: Right Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.43613091609689758, 0.64135749005731213, 0, 0, -0.67086080671260462, 0.0, -0.21659109937066717, 0};
-                std::vector<Real> const scalarFlux{-0.48279584670145054, -0.9719694288205295, -1.445998239926636};
-                Real thermalEnergyFlux = -0.19746407621898149;
-                std::vector<Real> const testFluxes = computeFluxes(leftVxTurnOver,
-                                                                   leftRarefactionCenter,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Left Vx turnover point\n"
-                                                "Right State: Midpoint\n"
-                                                "HLLD State: Right Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.0011656375857387598, 0.0062355370788444902, 0, 0, -0.00055517615333601446, 0.0, -0.0005829533231464588, 0};
-                std::vector<Real> const scalarFlux{-0.0012903579278217153, -0.0025977614899708843, -0.0038646879530001054};
-                Real thermalEnergyFlux = -0.00034184143405415065;
-                std::vector<Real> const testFluxes = computeFluxes(leftVxTurnOver,
-                                                                   midPoint,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Midpoint\n"
-                                                "Right State: Left Vx turnover point\n"
-                                                "HLLD State: Right Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-0.0068097924351817191, 0.010501781004354172, 0, 0, -0.0027509360975397175, 0.0, -0.0033826654536986789, 0};
-                std::vector<Real> const scalarFlux{-0.0075384234028349319, -0.015176429414463658, -0.022577963432775162};
-                Real thermalEnergyFlux = -0.001531664896602873;
-                std::vector<Real> const testFluxes = computeFluxes(midPoint,
-                                                                   leftVxTurnOver,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Midpoint\n"
-                                                "Right State: Right Vx turnover point\n"
-                                                "HLLD State: Left Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.0013952100758668729, 0.0061359407125797273, 0, 0, 0.00065984543596031629, 0.0, 0.00069776606396793105, 0};
-                std::vector<Real> const scalarFlux{ 0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683};
-                Real thermalEnergyFlux = 0.00040916715364737997;
-                std::vector<Real> const testFluxes = computeFluxes(midPoint,
-                                                                   rightVxTurnOver,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Vx turnover point\n"
-                                                "Right State: Midpoint\n"
-                                                "HLLD State: Left Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.0090024688079190333, 0.011769373146023688, 0, 0, 0.003725251767222792, 0.0, 0.0045418689996141555, 0};
-                std::vector<Real> const scalarFlux{0.0099657107306674268, 0.020063068547205749, 0.029847813055181766};
-                Real thermalEnergyFlux = 0.0020542406295284269;
-                std::vector<Real> const testFluxes = computeFluxes(rightVxTurnOver,
-                                                                   midPoint,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Vx turnover point\n"
-                                                "Right State: Right rarefaction center\n"
-                                                "HLLD State: Left Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.023310393229073981, 0.0033086897645311728, 0, 0, 0.0034208520409618887, 0.0, 0.011760413130542123, 0};
-                std::vector<Real> const scalarFlux{0.025804547718589466, 0.051949973634547723, 0.077285939467198722};
-                Real thermalEnergyFlux = 0.0053191138878843835;
-                std::vector<Real> const testFluxes = computeFluxes(rightVxTurnOver,
-                                                                   rightRarefactionCenter,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right rarefaction center\n"
-                                                "Right State: Right Vx turnover point\n"
-                                                "HLLD State: Left Star"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.33914253809565298, 0.46770133685446141, 0, 0, 0.46453338019960133, 0.0, 0.17077520175095764, 0};
-                std::vector<Real> const scalarFlux{0.37542995185416178, 0.75581933514738364, 1.1244318966408966};
-                Real thermalEnergyFlux = 0.1444638874418068;
-                std::vector<Real> const testFluxes = computeFluxes(rightRarefactionCenter,
-                                                                   rightVxTurnOver,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right rarefaction center\n"
-                                                "Right State: Right Einfeldt Strong Rarefaction state\n"
-                                                "HLLD State: Left"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{0.33976563660000003, 0.46733255780629601, 0, 0, 0.46427650313257612, 0.0, 0.17108896296000001, 0};
-                std::vector<Real> const scalarFlux{0.37611972035917141, 0.75720798400261535, 1.1264977885722693};
-                Real thermalEnergyFlux = 0.14472930749999999;
-                std::vector<Real> const testFluxes = computeFluxes(rightRarefactionCenter,
-                                                                   rightICs,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Right Einfeldt Strong Rarefaction state\n"
-                                                "Right State: Right rarefaction center\n"
-                                                "HLLD State: Left"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0};
-                std::vector<Real> const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996};
-                Real thermalEnergyFlux = 1.3499999999999996;
-                std::vector<Real> const testFluxes = computeFluxes(rightICs,
-                                                                   rightRarefactionCenter,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test the HLLD Riemann Solver with the degenerate state
-    *
-    */
-    TEST_F(tMHDCalculateHLLDFluxesCUDA,
-           DegenerateStateCorrectInputExpectCorrectOutput)
-    {
-        // Constant Values
-        Real const gamma = 5./3.;
-        std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
-
-        // State
-        std::vector<Real> const      // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars |
-            state = primitive2Conserved({1.0,       1.0,         1.0,         1.0,         1.0,       3.0E4,             1.0,               1.0},              gamma,            primitiveScalar);
-
-        std::vector<Real> const fiducialFlux{1, -449999997, -29999, -29999, -59994, 0.0, -29999, -29999};
-        std::vector<Real> const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998};
-        Real thermalEnergyFlux = 1.5;
-        std::string const outputString {"Left State:  Degenerate state\n"
-                                        "Right State: Degenerate state\n"
-                                        "HLLD State: Left Double Star State"};
-
-        // Compute the fluxes and check for correctness
-        // Order of Fluxes is rho, vec(V), E, vec(B)
-        // If you run into issues with the energy try 0.001953125 instead.
-        // That's what I got when running the Athena solver on its own. Running
-        // the Athena solver with theses tests gave me -0.00080700946455175148
-        // though
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            std::vector<Real> const testFluxes = computeFluxes(state,
-                                                               state,
-                                                               gamma,
-                                                               direction);
-            checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test the HLLD Riemann Solver with all zeroes
-    *
-    */
-    TEST_F(tMHDCalculateHLLDFluxesCUDA,
-           AllZeroesExpectAllZeroes)
-    {
-        // Constant Values
-        Real const gamma = 5./3.;
-
-        // State
-        size_t numElements = 8;
-        #ifdef SCALAR
-            numElements += 3;
-        #endif // SCALAR
-
-        std::vector<Real> const state(numElements, 0.0);
-        std::vector<Real> const fiducialFlux(8,0.0);
-        std::vector<Real> const scalarFlux(3,0.0);
-        Real thermalEnergyFlux = 0.0;
-
-        std::string const outputString {"Left State:  All zeroes\n"
-                                        "Right State: All zeroes\n"
-                                        "HLLD State: Right Star State"};
-
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            // Compute the fluxes and check for correctness
-            // Order of Fluxes is rho, vec(V), E, vec(B)
-            std::vector<Real> const testFluxes = computeFluxes(state,
-                                                               state,
-                                                               gamma,
-                                                               direction);
-            checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-    * \brief Test the HLLD Riemann Solver with negative pressure, energy, and
-      density.
-    *
-    */
-    TEST_F(tMHDCalculateHLLDFluxesCUDA,
-           UnphysicalValuesExpectAutomaticFix)
-    {
-        // Constant Values
-        Real const gamma                  = 5./3.;
-
-        // States
-        std::vector<Real>                // | Density | X-Momentum | Y-Momentum | Z-Momentum | Energy   | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars |
-            negativePressure              = { 1.0,      1.0,         1.0,         1.0,         1.5,       1.0,               1.0,               1.0},
-            negativeEnergy                = { 1.0,      1.0,         1.0,         1.0,        -(5-gamma), 1.0,               1.0,               1.0},
-            negativeDensity               = {-1.0,      1.0,         1.0,         1.0,         1.0,       1.0,               1.0,               1.0},
-            negativeDensityEnergyPressure = {-1.0,     -1.0,        -1.0,        -1.0,         -gamma,    1.0,               1.0,               1.0},
-            negativeDensityPressure       = {-1.0,      1.0,         1.0,         1.0,        -1.0,       1.0,               1.0,               1.0};
-
-        #ifdef SCALAR
-            std::vector<Real> const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875};
-            negativePressure.insert(negativePressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS);
-            negativeEnergy.insert(negativeEnergy.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS);
-            negativeDensity.insert(negativeDensity.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS);
-            negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS);
-            negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS);
-        #endif  // SCALAR
-        #ifdef  DE
-            negativePressure.push_back(mhdUtils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(5 + NSCALARS),negativePressure.at(6 + NSCALARS),negativePressure.at(7 + NSCALARS),gamma));
-            negativeEnergy.push_back(mhdUtils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(5 + NSCALARS),negativeEnergy.at(6 + NSCALARS),negativeEnergy.at(7 + NSCALARS),gamma));
-            negativeDensity.push_back(mhdUtils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(5 + NSCALARS),negativeDensity.at(6 + NSCALARS),negativeDensity.at(7 + NSCALARS),gamma));
-            negativeDensityEnergyPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(5 + NSCALARS),negativeDensityEnergyPressure.at(6 + NSCALARS),negativeDensityEnergyPressure.at(7 + NSCALARS),gamma));
-            negativeDensityPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(5 + NSCALARS),negativeDensityPressure.at(6 + NSCALARS),negativeDensityPressure.at(7 + NSCALARS),gamma));
-        #endif  //DE
-
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            {
-                std::string const outputString {"Left State:  Negative Pressure\n"
-                                                "Right State: Negative Pressure\n"
-                                                "HLLD State: Left Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0};
-                std::vector<Real> const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998};
-                Real thermalEnergyFlux = -1.5;
-                std::vector<Real> const testFluxes = computeFluxes(negativePressure,
-                                                                   negativePressure,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Negative Energy\n"
-                                                "Right State: Negative Energy\n"
-                                                "HLLD State: Left Star State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0};
-                std::vector<Real> const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998};
-                Real thermalEnergyFlux = -6.333333333333333;
-                std::vector<Real> const testFluxes = computeFluxes(negativeEnergy,
-                                                                   negativeEnergy,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Negative Density\n"
-                                                "Right State: Negative Density\n"
-                                                "HLLD State: Left State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{1, 1E+20, 1e+20, 1e+20, -5e+19, 0, 0, 0};
-                std::vector<Real> const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20};
-                Real thermalEnergyFlux = -1.5000000000000001e+40;
-                std::vector<Real> const testFluxes = computeFluxes(negativeDensity,
-                                                                   negativeDensity,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Negative Density, Energy, and Pressure\n"
-                                                "Right State: Negative Density, Energy, and Pressure\n"
-                                                "HLLD State: Right State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, 1.5E+20, 0, 0, 0};
-                std::vector<Real> const scalarFlux{-1.1069975296000002e+20, -2.2286185018000002e+20, -3.3155141874999997e+20};
-                Real thermalEnergyFlux = 1.5000000000000001e+40;
-                std::vector<Real> const testFluxes = computeFluxes(negativeDensityEnergyPressure,
-                                                                   negativeDensityEnergyPressure,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-            {
-                std::string const outputString {"Left State:  Negative Density and Pressure\n"
-                                                "Right State: Negative Density and Pressure\n"
-                                                "HLLD State: Left State"};
-                // Compute the fluxes and check for correctness
-                // Order of Fluxes is rho, vec(V), E, vec(B)
-                std::vector<Real> const fiducialFlux{1, 1e+20, 1e+20, 1e+20, -1.5e+20, 0, 0, 0};
-                std::vector<Real> const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20};
-                Real thermalEnergyFlux = -1.5000000000000001e+40;
-                std::vector<Real> const testFluxes = computeFluxes(negativeDensityPressure,
-                                                                   negativeDensityPressure,
-                                                                   gamma,
-                                                                   direction);
-                checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
-            }
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    // End of integration tests for the entire HLLD solver. Unit tests are below
-    // =========================================================================
-
-    // =========================================================================
-    // Unit tests for the contents of the _hlldInternal namespace
-    // =========================================================================
-    /*!
-     * \brief A struct to hold some basic test values
-     *
-     */
-    namespace
-    {
-        struct testParams
-        {
-            // List of cases
-            std::vector<std::string> names{"Case 1", "Case 2"};
-
-            // Conserved Variables
-            double gamma = 5./3.;
-            std::valarray<double> densityL  {21.50306776645775  , 48.316634031589935};
-            std::valarray<double> densityR  {81.1217731762265   , 91.02955738853635};
-            std::valarray<double> momentumXL{38.504606872151484 , 18.984145880030045};
-            std::valarray<double> momentumXR{ 8.201811315045326 , 85.24863367778745};
-            std::valarray<double> momentumYL{ 7.1046427940455015, 33.76182584816693};
-            std::valarray<double> momentumYR{13.874767484202021 , 33.023492551299974};
-            std::valarray<double> momentumZL{32.25700338919422  , 89.52561861038686};
-            std::valarray<double> momentumZR{33.85305318830181  ,  8.664313303796256};
-            std::valarray<double> energyL   {65.75120838109942  , 38.461354599479826};
-            std::valarray<double> energyR   {18.88982523270516  , 83.65639784178894};
-            std::valarray<double> magneticXL{92.75101068883114  , 31.588767769990532};
-            std::valarray<double> magneticXR{93.66196246448985  , 84.3529879134052};
-            std::valarray<double> magneticYL{12.297499156516622 , 63.74471969570406};
-            std::valarray<double> magneticYR{84.9919141787549   , 35.910258841630984};
-            std::valarray<double> magneticZL{46.224045698787776 , 37.70326455170754};
-            std::valarray<double> magneticZR{34.852095153095384 , 24.052685003977757};
-            // Star States
-            std::valarray<double> densityStarL  {28.520995251761526 , 54.721668215064945};
-            std::valarray<double> densityStarR  {49.09069570738605  , 72.68000504460609};
-            std::valarray<double> momentumStarXL{48.96082367518151  , 97.15439466280228};
-            std::valarray<double> momentumStarXR{65.74705433463932  , 94.5689655974538};
-            std::valarray<double> momentumStarYL{44.910034185328996 , 78.60179936059853};
-            std::valarray<double> momentumStarYR{51.642522487399276 , 44.63864007208728};
-            std::valarray<double> momentumStarZL{39.78163555990428  , 63.01612978428839};
-            std::valarray<double> momentumStarZR{33.47900698769427  , 52.19410653341197};
-            std::valarray<double> energyStarL   { 6.579867455284738 , 30.45043664908369};
-            std::valarray<double> energyStarR   {90.44484278669114  , 61.33664731346812};
-            std::valarray<double> magneticStarXL{49.81491527582234  , 62.379765828560906};
-            std::valarray<double> magneticStarXR{67.77402751903804  , 64.62226739788758};
-            std::valarray<double> magneticStarYL{62.09348829143065  , 54.27916744403672};
-            std::valarray<double> magneticStarYR{26.835645069149873 , 98.97444628327318};
-            std::valarray<double> magneticStarZL{62.765890944643196 , 93.26765455509641};
-            std::valarray<double> magneticStarZR{ 7.430231695917344 , 10.696380763901459};
-            // Double Star State
-            std::valarray<double> momentumDoubleStarXL{75.42525315887075  , 83.87480678359029};
-            std::valarray<double> momentumDoubleStarYL{22.56132540660678  , 76.11074421934487};
-            std::valarray<double> momentumDoubleStarZL{27.83908778933224  , 28.577101567661465};
-            std::valarray<double> energyDoubleStar    {45.83202455707669  , 55.4553014145573};
-            std::valarray<double> magneticDoubleStarY {20.943239839455895 , 83.8514810487021};
-            std::valarray<double> magneticDoubleStarZ {83.3802438268807   , 80.36671251730783};
-            // Fluxes
-            std::valarray<double> densityFluxL     {12.939239309626116 , 81.71524586517073};
-            std::valarray<double> momentumFluxXL   {65.05481464917627  , 56.09885069707803};
-            std::valarray<double> momentumFluxYL   {73.67692845586782  ,  2.717246983403787};
-            std::valarray<double> momentumFluxZL   {16.873647595664387 , 39.70132983192873};
-            std::valarray<double> energyFluxL      {52.71888731972469  , 81.63926176158796};
-            std::valarray<double> magneticFluxXL   {67.7412464028116   , 42.85301340921149};
-            std::valarray<double> magneticFluxYL   {58.98928445415967  , 57.04344459221359};
-            std::valarray<double> magneticFluxZL   {29.976925743532302 , 97.73329827141359};
-            std::valarray<double> momentumStarFluxX{74.90125547448865  , 26.812722601652684};
-            std::valarray<double> momentumStarFluxY{16.989138610622945 , 48.349566649914976};
-            std::valarray<double> momentumStarFluxZ{38.541822734846185 , 61.22843961052538};
-            std::valarray<double> energyStarFlux   {19.095105176247017 , 45.43224973313112};
-            std::valarray<double> magneticStarFluxY{96.23964526624277  , 33.05337536594796};
-            std::valarray<double> magneticStarFluxZ{86.22516928268347  , 15.62102082410738};
-
-            // Derived/Primitive variables
-            std::valarray<double> velocityXL     = momentumXL / densityL;
-            std::valarray<double> velocityXR     = momentumXR / densityR;
-            std::valarray<double> velocityYL     = momentumYL / densityL;
-            std::valarray<double> velocityYR     = momentumYR / densityR;
-            std::valarray<double> velocityZL     = momentumZL / densityL;
-            std::valarray<double> velocityZR     = momentumZR / densityR;
-            std::valarray<double> totalPressureStarL{66.80958736783934  , 72.29644038317676};
-            std::vector<double> gasPressureL;
-            std::vector<double> gasPressureR;
-            std::vector<double> totalPressureL;
-            std::vector<double> totalPressureR;
-            // Star State
-            std::valarray<double> velocityStarXL = momentumStarXL / densityStarL;
-            std::valarray<double> velocityStarXR = momentumStarXR / densityStarR;
-            std::valarray<double> velocityStarYL = momentumStarYL / densityStarL;
-            std::valarray<double> velocityStarYR = momentumStarYR / densityStarR;
-            std::valarray<double> velocityStarZL = momentumStarZL / densityStarL;
-            std::valarray<double> velocityStarZR = momentumStarZR / densityStarR;
-            // Double Star State
-            std::valarray<double> velocityDoubleStarXL = momentumDoubleStarXL / densityStarL;
-            std::valarray<double> velocityDoubleStarYL = momentumDoubleStarYL / densityStarL;
-            std::valarray<double> velocityDoubleStarZL = momentumDoubleStarZL / densityStarL;
-            // Other
-            std::valarray<double> speedM            {68.68021569453585  , 70.08236749169825};
-            std::valarray<double> speedSide         {70.37512772923496  ,  3.6579130085113265};
-            testParams()
-            {
-                for (size_t i = 0; i < names.size(); i++)
-                {
-                    gasPressureL.push_back(mhdUtils::computeGasPressure(energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], magneticXL[i], magneticYL[i], magneticZL[i], gamma));
-                    gasPressureR.push_back(mhdUtils::computeGasPressure(energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], magneticXR[i], magneticYR[i], magneticZR[i], gamma));
-                    totalPressureL.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i]));
-                    totalPressureR.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i]));
-                }
-            }
-        };
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Test the _hlldInternal::_approximateWaveSpeeds function
-     *
-     */
-    TEST(tMHDHlldInternalApproximateWaveSpeeds,
-         CorrectInputExpectCorrectOutput)
-    {
-        testParams const parameters;
-        std::vector<double> const fiducialSpeedL      {-22.40376497145191, -11.190385012513822};
-        std::vector<double> const fiducialSpeedR      {24.295526347371595, 12.519790189404299};
-        std::vector<double> const fiducialSpeedM      {-0.81760587897407833, -0.026643804611559244};
-        std::vector<double> const fiducialSpeedStarL  {-19.710500632936679, -4.4880642018724357};
-        std::vector<double> const fiducialSpeedStarR  {9.777062240423124, 9.17474383484066};
-        std::vector<double> const fiducialDensityStarL{24.101290139122913, 50.132466596958501};
-        std::vector<double> const fiducialDensityStarR{78.154104734671265, 84.041595114910123};
-
-        double testSpeedL = 0;
-        double testSpeedR = 0;
-        double testSpeedM = 0;
-        double testSpeedStarL = 0;
-        double testSpeedStarR = 0;
-        double testDensityStarL = 0;
-        double testDensityStarR = 0;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            _hlldInternal::_approximateWaveSpeeds(parameters.densityL[i],
-                                                  parameters.momentumXL[i],
-                                                  parameters.momentumYL[i],
-                                                  parameters.momentumZL[i],
-                                                  parameters.velocityXL[i],
-                                                  parameters.velocityYL[i],
-                                                  parameters.velocityZL[i],
-                                                  parameters.gasPressureL[i],
-                                                  parameters.totalPressureL[i],
-                                                  parameters.magneticXL[i],
-                                                  parameters.magneticYL[i],
-                                                  parameters.magneticZL[i],
-                                                  parameters.densityR[i],
-                                                  parameters.momentumXR[i],
-                                                  parameters.momentumYR[i],
-                                                  parameters.momentumZR[i],
-                                                  parameters.velocityXR[i],
-                                                  parameters.velocityYR[i],
-                                                  parameters.velocityZR[i],
-                                                  parameters.gasPressureR[i],
-                                                  parameters.totalPressureR[i],
-                                                  parameters.magneticXR[i],
-                                                  parameters.magneticYR[i],
-                                                  parameters.magneticZR[i],
-                                                  parameters.gamma,
-                                                  testSpeedL,
-                                                  testSpeedR,
-                                                  testSpeedM,
-                                                  testSpeedStarL,
-                                                  testSpeedStarR,
-                                                  testDensityStarL,
-                                                  testDensityStarR);
-            // Now check results
-            testingUtilities::checkResults(fiducialSpeedL[i],
-                                           testSpeedL,
-                                           parameters.names.at(i) + ", SpeedL");
-            testingUtilities::checkResults(fiducialSpeedR.at(i),
-                                           testSpeedR,
-                                           parameters.names.at(i) + ", SpeedR");
-            testingUtilities::checkResults(fiducialSpeedM.at(i),
-                                           testSpeedM,
-                                           parameters.names.at(i) + ", SpeedM");
-            testingUtilities::checkResults(fiducialSpeedStarL.at(i),
-                                           testSpeedStarL,
-                                           parameters.names.at(i) + ", SpeedStarL");
-            testingUtilities::checkResults(fiducialSpeedStarR.at(i),
-                                           testSpeedStarR,
-                                           parameters.names.at(i) + ", SpeedStarR");
-            testingUtilities::checkResults(fiducialDensityStarL.at(i),
-                                           testDensityStarL,
-                                           parameters.names.at(i) + ", DensityStarL");
-            testingUtilities::checkResults(fiducialDensityStarR.at(i),
-                                           testDensityStarR,
-                                           parameters.names.at(i) + ", DensityStarR");
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Test the _hlldInternal::_starFluxes function in the non-degenerate
-     * case
-     *
-     */
-     TEST(tMHDHlldInternalStarFluxes,
-          CorrectInputNonDegenerateExpectCorrectOutput)
-    {
-        testParams const parameters;
-
-        std::vector<double> const fiducialVelocityStarY    {12.831290892281075, 12.92610185957192};
-        std::vector<double> const fiducialVelocityStarZ    {48.488664548015286, 9.0850326944201107};
-        std::vector<double> const fiducialEnergyStar       {1654897.6912410262, 956.83439334487116};
-        std::vector<double> const fiducialMagneticStarY    {-186.47142421374559, 2.6815421494204679};
-        std::vector<double> const fiducialMagneticStarZ    {-700.91191100481922, 1.5860591049546646};
-        std::vector<double> const fiducialDensityStarFlux  {506.82678248238807, 105.14430372486369};
-        std::vector<double> const fiducialMomentumStarFluxX{135208.06632708258, 14014.840899433098};
-        std::vector<double> const fiducialMomentumStarFluxY{25328.25203616685, 2466.5997745560339};
-        std::vector<double> const fiducialMomentumStarFluxZ{95071.711914347878, 1530.7490710422007};
-        std::vector<double> const fiducialEnergyStarFlux   {116459061.8691024, 3440.9679468544314};
-        std::vector<double> const fiducialMagneticStarFluxY{-13929.399086330559, -166.32034689537392};
-        std::vector<double> const fiducialMagneticStarFluxZ{-52549.811458376971, -34.380297363339892};
-
-        double testVelocityStarY;
-        double testVelocityStarZ;
-        double testEnergyStar;
-        double testMagneticStarY;
-        double testMagneticStarZ;
-        double testDensityStarFlux;
-        double testMomentumStarFluxX;
-        double testMomentumStarFluxY;
-        double testMomentumStarFluxZ;
-        double testEnergyStarFlux;
-        double testMagneticStarFluxY;
-        double testMagneticStarFluxZ;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            _hlldInternal::_starFluxes(parameters.speedM[i],
-                                       parameters.speedSide[i],
-                                       parameters.densityL[i],
-                                       parameters.velocityXL[i],
-                                       parameters.velocityYL[i],
-                                       parameters.velocityZL[i],
-                                       parameters.momentumXL[i],
-                                       parameters.momentumYL[i],
-                                       parameters.momentumZL[i],
-                                       parameters.energyL[i],
-                                       parameters.totalPressureL[i],
-                                       parameters.magneticXL[i],
-                                       parameters.magneticYL[i],
-                                       parameters.magneticZL[i],
-                                       parameters.densityStarL[i],
-                                       parameters.totalPressureStarL[i],
-                                       parameters.densityFluxL[i],
-                                       parameters.momentumFluxXL[i],
-                                       parameters.momentumFluxYL[i],
-                                       parameters.momentumFluxZL[i],
-                                       parameters.energyFluxL[i],
-                                       parameters.magneticFluxYL[i],
-                                       parameters.magneticFluxZL[i],
-                                       testVelocityStarY,
-                                       testVelocityStarZ,
-                                       testEnergyStar,
-                                       testMagneticStarY,
-                                       testMagneticStarZ,
-                                       testDensityStarFlux,
-                                       testMomentumStarFluxX,
-                                       testMomentumStarFluxY,
-                                       testMomentumStarFluxZ,
-                                       testEnergyStarFlux,
-                                       testMagneticStarFluxY,
-                                       testMagneticStarFluxZ);
-
-            // Now check results
-            testingUtilities::checkResults(fiducialVelocityStarY[i],
-                                            testVelocityStarY,
-                                            parameters.names.at(i) + ", VelocityStarY");
-            testingUtilities::checkResults(fiducialVelocityStarZ[i],
-                                            testVelocityStarZ,
-                                            parameters.names.at(i) + ", VelocityStarZ");
-            testingUtilities::checkResults(fiducialEnergyStar[i],
-                                           testEnergyStar,
-                                           parameters.names.at(i) + ", EnergyStar");
-            testingUtilities::checkResults(fiducialMagneticStarY[i],
-                                            testMagneticStarY,
-                                            parameters.names.at(i) + ", MagneticStarY");
-            testingUtilities::checkResults(fiducialMagneticStarZ[i],
-                                            testMagneticStarZ,
-                                            parameters.names.at(i) + ", MagneticStarZ");
-            testingUtilities::checkResults(fiducialDensityStarFlux[i],
-                                            testDensityStarFlux,
-                                            parameters.names.at(i) + ", DensityStarFlux");
-            testingUtilities::checkResults(fiducialMomentumStarFluxX[i],
-                                            testMomentumStarFluxX,
-                                            parameters.names.at(i) + ", MomentumStarFluxX");
-            testingUtilities::checkResults(fiducialMomentumStarFluxY[i],
-                                            testMomentumStarFluxY,
-                                            parameters.names.at(i) + ", MomentumStarFluxY");
-            testingUtilities::checkResults(fiducialMomentumStarFluxZ[i],
-                                            testMomentumStarFluxZ,
-                                            parameters.names.at(i) + ", MomentumStarFluxZ");
-            testingUtilities::checkResults(fiducialEnergyStarFlux[i],
-                                            testEnergyStarFlux,
-                                            parameters.names.at(i) + ", EnergyStarFlux");
-            testingUtilities::checkResults(fiducialMagneticStarFluxY[i],
-                                            testMagneticStarFluxY,
-                                            parameters.names.at(i) + ", MagneticStarFluxY");
-            testingUtilities::checkResults(fiducialMagneticStarFluxZ[i],
-                                            testMagneticStarFluxZ,
-                                            parameters.names.at(i) + ", MagneticStarFluxZ");
-        }
-    }
-
-    /*!
-     * \brief Test the _hlldInternal::_starFluxes function in the degenerate
-     * case
-     *
-     */
-     TEST(tMHDHlldInternalStarFluxes,
-          CorrectInputDegenerateExpectCorrectOutput)
-    {
-        testParams const parameters;
-
-        // Used to get us into the degenerate case
-        double const totalPressureStarMultiplier = 1E15;
-
-        std::vector<double> const fiducialVelocityStarY    {0.33040135813215948, 0.69876195899931859};
-        std::vector<double> const fiducialVelocityStarZ    {1.500111692877206, 1.8528943583250035};
-        std::vector<double> const fiducialEnergyStar       {2.7072182962581443e+18, -76277716432851392};
-        std::vector<double> const fiducialMagneticStarY    {12.297499156516622, 63.744719695704063};
-        std::vector<double> const fiducialMagneticStarZ    {46.224045698787776, 37.703264551707541};
-        std::vector<double> const fiducialDensityStarFlux  {506.82678248238807, 105.14430372486369};
-        std::vector<double> const fiducialMomentumStarFluxX{135208.06632708258, 14014.840899433098};
-        std::vector<double> const fiducialMomentumStarFluxY{236.85804348470396, 19.08858135095122};
-        std::vector<double> const fiducialMomentumStarFluxZ{757.76012607552047, 83.112898961023902};
-        std::vector<double> const fiducialEnergyStarFlux   {1.9052083339008875e+20, -2.7901725119926531e+17};
-        std::vector<double> const fiducialMagneticStarFluxY{58.989284454159673, 57.043444592213589};
-        std::vector<double> const fiducialMagneticStarFluxZ{29.976925743532302, 97.733298271413588};
-
-        double testVelocityStarY;
-        double testVelocityStarZ;
-        double testEnergyStar;
-        double testMagneticStarY;
-        double testMagneticStarZ;
-        double testDensityStarFlux;
-        double testMomentumStarFluxX;
-        double testMomentumStarFluxY;
-        double testMomentumStarFluxZ;
-        double testEnergyStarFlux;
-        double testMagneticStarFluxY;
-        double testMagneticStarFluxZ;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            _hlldInternal::_starFluxes(parameters.speedM[i],
-                                        parameters.speedSide[i],
-                                        parameters.densityL[i],
-                                        parameters.velocityXL[i],
-                                        parameters.velocityYL[i],
-                                        parameters.velocityZL[i],
-                                        parameters.momentumXL[i],
-                                        parameters.momentumYL[i],
-                                        parameters.momentumZL[i],
-                                        parameters.energyL[i],
-                                        parameters.totalPressureL[i],
-                                        parameters.magneticXL[i],
-                                        parameters.magneticYL[i],
-                                        parameters.magneticZL[i],
-                                        parameters.densityStarL[i],
-                                        parameters.totalPressureStarL[i] * totalPressureStarMultiplier,
-                                        parameters.densityFluxL[i],
-                                        parameters.momentumFluxXL[i],
-                                        parameters.momentumFluxYL[i],
-                                        parameters.momentumFluxZL[i],
-                                        parameters.energyFluxL[i],
-                                        parameters.magneticFluxYL[i],
-                                        parameters.magneticFluxZL[i],
-                                        testVelocityStarY,
-                                        testVelocityStarZ,
-                                        testEnergyStar,
-                                        testMagneticStarY,
-                                        testMagneticStarZ,
-                                        testDensityStarFlux,
-                                        testMomentumStarFluxX,
-                                        testMomentumStarFluxY,
-                                        testMomentumStarFluxZ,
-                                        testEnergyStarFlux,
-                                        testMagneticStarFluxY,
-                                        testMagneticStarFluxZ);
-
-            // Now check results
-            testingUtilities::checkResults(fiducialVelocityStarY[i],
-                                            testVelocityStarY,
-                                            parameters.names.at(i) + ", VelocityStarY");
-            testingUtilities::checkResults(fiducialVelocityStarZ[i],
-                                            testVelocityStarZ,
-                                            parameters.names.at(i) + ", VelocityStarZ");
-            testingUtilities::checkResults(fiducialEnergyStar[i],
-                                            testEnergyStar,
-                                            parameters.names.at(i) + ", EnergyStar");
-            testingUtilities::checkResults(fiducialMagneticStarY[i],
-                                            testMagneticStarY,
-                                            parameters.names.at(i) + ", MagneticStarY");
-            testingUtilities::checkResults(fiducialMagneticStarZ[i],
-                                            testMagneticStarZ,
-                                            parameters.names.at(i) + ", MagneticStarZ");
-            testingUtilities::checkResults(fiducialDensityStarFlux[i],
-                                            testDensityStarFlux,
-                                            parameters.names.at(i) + ", DensityStarFlux");
-            testingUtilities::checkResults(fiducialMomentumStarFluxX[i],
-                                            testMomentumStarFluxX,
-                                            parameters.names.at(i) + ", MomentumStarFluxX");
-            testingUtilities::checkResults(fiducialMomentumStarFluxY[i],
-                                            testMomentumStarFluxY,
-                                            parameters.names.at(i) + ", MomentumStarFluxY");
-            testingUtilities::checkResults(fiducialMomentumStarFluxZ[i],
-                                            testMomentumStarFluxZ,
-                                            parameters.names.at(i) + ", MomentumStarFluxZ");
-            testingUtilities::checkResults(fiducialEnergyStarFlux[i],
-                                            testEnergyStarFlux,
-                                            parameters.names.at(i) + ", EnergyStarFlux");
-            testingUtilities::checkResults(fiducialMagneticStarFluxY[i],
-                                            testMagneticStarFluxY,
-                                            parameters.names.at(i) + ", MagneticStarFluxY");
-            testingUtilities::checkResults(fiducialMagneticStarFluxZ[i],
-                                            testMagneticStarFluxZ,
-                                            parameters.names.at(i) + ", MagneticStarFluxZ");
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Test the _hlldInternal::_nonStarFluxes function
-     *
-     */
-    TEST(tMHDHlldInternalNonStarFluxes,
-         CorrectInputExpectCorrectOutput)
-    {
-        testParams const parameters;
-
-        std::vector<double> const fiducialDensityFlux  {38.504606872151484, 18.984145880030045};
-        std::vector<double> const fiducialMomentumFluxX{-3088.4810263278778, 2250.9966820900618};
-        std::vector<double> const fiducialMomentumFluxY{-1127.8835013070616, -2000.3517480656785};
-        std::vector<double> const fiducialMomentumFluxZ{-4229.5657456907293, -1155.8240512956793};
-        std::vector<double> const fiducialMagneticFluxY{-8.6244637840856555, 2.9729840344910059};
-        std::vector<double> const fiducialMagneticFluxZ{-56.365490339906408, -43.716615275067923};
-        std::vector<double> const fiducialEnergyFlux   {-12344.460641662206, -2717.2127176227905};
-
-        double testDensityFlux;
-        double testMomentumFluxX;
-        double testMomentumFluxY;
-        double testMomentumFluxZ;
-        double testMagneticFluxY;
-        double testMagneticFluxZ;
-        double testEnergyFlux;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            _hlldInternal::_nonStarFluxes(parameters.momentumXL[i],
-                                          parameters.velocityXL[i],
-                                          parameters.velocityYL[i],
-                                          parameters.velocityZL[i],
-                                          parameters.totalPressureL[i],
-                                          parameters.energyL[i],
-                                          parameters.magneticXL[i],
-                                          parameters.magneticYL[i],
-                                          parameters.magneticZL[i],
-                                          testDensityFlux,
-                                          testMomentumFluxX,
-                                          testMomentumFluxY,
-                                          testMomentumFluxZ,
-                                          testMagneticFluxY,
-                                          testMagneticFluxZ,
-                                          testEnergyFlux);
-
-            // Now check results
-            testingUtilities::checkResults(fiducialDensityFlux[i],
-                                            testDensityFlux,
-                                            parameters.names.at(i) + ", DensityFlux");
-            testingUtilities::checkResults(fiducialMomentumFluxX[i],
-                                           testMomentumFluxX,
-                                           parameters.names.at(i) + ", MomentumFluxX");
-            testingUtilities::checkResults(fiducialMomentumFluxY[i],
-                                           testMomentumFluxY,
-                                           parameters.names.at(i) + ", MomentumFluxY");
-            testingUtilities::checkResults(fiducialMomentumFluxZ[i],
-                                           testMomentumFluxZ,
-                                           parameters.names.at(i) + ", MomentumFluxZ");
-            testingUtilities::checkResults(fiducialMagneticFluxY[i],
-                                           testMagneticFluxY,
-                                           parameters.names.at(i) + ", MagneticFluxY");
-            testingUtilities::checkResults(fiducialMagneticFluxZ[i],
-                                           testMagneticFluxZ,
-                                           parameters.names.at(i) + ", MagneticFluxZ");
-            testingUtilities::checkResults(fiducialEnergyFlux[i],
-                                           testEnergyFlux,
-                                           parameters.names.at(i) + ", EnergyFlux");
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Test the _hlldInternal::_dotProduct function
-     *
-     */
-    TEST(tMHDHlldInternalDotProduct,
-         CorrectInputExpectCorrectOutput)
-    {
-        testParams const parameters;
-
-        std::vector<double> const fiducialDotProduct{5149.7597411033557,6127.2319832451567};
-
-        double testDotProduct;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            testDotProduct = _hlldInternal::_dotProduct(parameters.momentumXL[i],
-                                                        parameters.momentumYL[i],
-                                                        parameters.momentumZL[i],
-                                                        parameters.magneticXL[i],
-                                                        parameters.magneticYL[i],
-                                                        parameters.magneticZL[i]);
-
-            // Now check results
-            testingUtilities::checkResults(fiducialDotProduct[i],
-                                           testDotProduct,
-                                           parameters.names.at(i) + ", DotProduct");
-            }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Test the _hlldInternal::_doubleStarState function. Non-degenerate
-     * state
-     *
-    */
-    TEST(tMHDHlldInternalDoubleStarState,
-         CorrectInputNonDegenerateExpectCorrectOutput)
-    {
-        testParams const parameters;
-
-        double const fixedEpsilon = 7E-12;
-
-        std::vector<double> const fiducialVelocityDoubleStarY{-1.5775383335759607, 3.803188977150934};
-        std::vector<double> const fiducialVelocityDoubleStarZ{-3.4914062207842482, -4.2662645349592765};
-        std::vector<double> const fiducialMagneticDoubleStarY{45.259313435283325, 71.787329583230417};
-        std::vector<double> const fiducialMagneticDoubleStarZ{36.670978215630669, 53.189673238238178};
-        std::vector<double> const fiducialEnergyDoubleStarL  {-2048.1953674500514, -999.79694164635089};
-        std::vector<double> const fiducialEnergyDoubleStarR  {1721.0582276783764, 252.04716752257781};
-
-        double testVelocityDoubleStarY;
-        double testVelocityDoubleStarZ;
-        double testMagneticDoubleStarY;
-        double testMagneticDoubleStarZ;
-        double testEnergyDoubleStarL;
-        double testEnergyDoubleStarR;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            _hlldInternal::_doubleStarState(parameters.speedM[i],
-                                            parameters.magneticXL[i],
-                                            parameters.totalPressureStarL[i],
-                                            parameters.densityStarL[i],
-                                            parameters.velocityStarYL[i],
-                                            parameters.velocityStarZL[i],
-                                            parameters.energyStarL[i],
-                                            parameters.magneticStarYL[i],
-                                            parameters.magneticStarZL[i],
-                                            parameters.densityStarR[i],
-                                            parameters.velocityStarYR[i],
-                                            parameters.velocityStarZR[i],
-                                            parameters.energyStarR[i],
-                                            parameters.magneticStarYR[i],
-                                            parameters.magneticStarZR[i],
-                                            testVelocityDoubleStarY,
-                                            testVelocityDoubleStarZ,
-                                            testMagneticDoubleStarY,
-                                            testMagneticDoubleStarZ,
-                                            testEnergyDoubleStarL,
-                                            testEnergyDoubleStarR);
-
-            // Now check results
-            testingUtilities::checkResults(fiducialVelocityDoubleStarY[i],
-                                           testVelocityDoubleStarY,
-                                           parameters.names.at(i) + ", VelocityDoubleStarY");
-            testingUtilities::checkResults(fiducialVelocityDoubleStarZ[i],
-                                           testVelocityDoubleStarZ,
-                                           parameters.names.at(i) + ", VelocityDoubleStarZ");
-            testingUtilities::checkResults(fiducialMagneticDoubleStarY[i],
-                                           testMagneticDoubleStarY,
-                                           parameters.names.at(i) + ", MagneticDoubleStarY");
-            testingUtilities::checkResults(fiducialMagneticDoubleStarZ[i],
-                                           testMagneticDoubleStarZ,
-                                           parameters.names.at(i) + ", MagneticDoubleStarZ");
-            testingUtilities::checkResults(fiducialEnergyDoubleStarL[i],
-                                           testEnergyDoubleStarL,
-                                           parameters.names.at(i) + ", EnergyDoubleStarL");
-            testingUtilities::checkResults(fiducialEnergyDoubleStarR[i],
-                                           testEnergyDoubleStarR,
-                                           parameters.names.at(i) + ", EnergyDoubleStarR",
-                                           fixedEpsilon);
-        }
-    }
-
-    /*!
-     * \brief Test the _hlldInternal::_doubleStarState function in the
-     * degenerate state.
-     *
-    */
-    TEST(tMHDHlldInternalDoubleStarState,
-         CorrectInputDegenerateExpectCorrectOutput)
-    {
-        testParams const parameters;
-
-        std::vector<double> const fiducialVelocityDoubleStarY{1.5746306813243216, 1.4363926014039052};
-        std::vector<double> const fiducialVelocityDoubleStarZ{1.3948193325212686, 1.1515754515491903};
-        std::vector<double> const fiducialMagneticDoubleStarY{62.093488291430653, 54.279167444036723};
-        std::vector<double> const fiducialMagneticDoubleStarZ{62.765890944643196, 93.267654555096414};
-        std::vector<double> const fiducialEnergyDoubleStarL  {6.579867455284738, 30.450436649083692};
-        std::vector<double> const fiducialEnergyDoubleStarR  {90.44484278669114, 61.33664731346812};
-
-        double testVelocityDoubleStarY;
-        double testVelocityDoubleStarZ;
-        double testMagneticDoubleStarY;
-        double testMagneticDoubleStarZ;
-        double testEnergyDoubleStarL;
-        double testEnergyDoubleStarR;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            _hlldInternal::_doubleStarState(parameters.speedM[i],
-                                            0.0,
-                                            parameters.totalPressureStarL[i],
-                                            parameters.densityStarL[i],
-                                            parameters.velocityStarYL[i],
-                                            parameters.velocityStarZL[i],
-                                            parameters.energyStarL[i],
-                                            parameters.magneticStarYL[i],
-                                            parameters.magneticStarZL[i],
-                                            parameters.densityStarR[i],
-                                            parameters.velocityStarYR[i],
-                                            parameters.velocityStarZR[i],
-                                            parameters.energyStarR[i],
-                                            parameters.magneticStarYR[i],
-                                            parameters.magneticStarZR[i],
-                                            testVelocityDoubleStarY,
-                                            testVelocityDoubleStarZ,
-                                            testMagneticDoubleStarY,
-                                            testMagneticDoubleStarZ,
-                                            testEnergyDoubleStarL,
-                                            testEnergyDoubleStarR);
-            // Now check results
-            testingUtilities::checkResults(fiducialVelocityDoubleStarY[i],
-                                            testVelocityDoubleStarY,
-                                            parameters.names.at(i) + ", VelocityDoubleStarY");
-            testingUtilities::checkResults(fiducialVelocityDoubleStarZ[i],
-                                            testVelocityDoubleStarZ,
-                                            parameters.names.at(i) + ", VelocityDoubleStarZ");
-            testingUtilities::checkResults(fiducialMagneticDoubleStarY[i],
-                                            testMagneticDoubleStarY,
-                                            parameters.names.at(i) + ", MagneticDoubleStarY");
-            testingUtilities::checkResults(fiducialMagneticDoubleStarZ[i],
-                                            testMagneticDoubleStarZ,
-                                            parameters.names.at(i) + ", MagneticDoubleStarZ");
-            testingUtilities::checkResults(fiducialEnergyDoubleStarL[i],
-                                            testEnergyDoubleStarL,
-                                            parameters.names.at(i) + ", EnergyDoubleStarL");
-            testingUtilities::checkResults(fiducialEnergyDoubleStarR[i],
-                                            testEnergyDoubleStarR,
-                                            parameters.names.at(i) + ", EnergyDoubleStarR");
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Test the _hlldInternal::_doubleStarFluxes function
-     *
-     */
-    TEST(tMHDHlldInternalDoubleStarFluxes,
-         CorrectInputExpectCorrectOutput)
-    {
-        testParams const parameters;
-
-        std::vector<double> const fiducialMomentumDoubleStarFluxX{1937.3388606704509, -21.762854649386174};
-        std::vector<double> const fiducialMomentumDoubleStarFluxY{-1555.8040962754276, 39.237503643804175};
-        std::vector<double> const fiducialMomentumDoubleStarFluxZ{-801.91650203165148, -64.746529703562871};
-        std::vector<double> const fiducialEnergyDoubleStarFlux   {2781.4706748628528, 136.89786983482355};
-        std::vector<double> const fiducialMagneticDoubleStarFluxY{-2799.7143456312342, 141.2263259922299};
-        std::vector<double> const fiducialMagneticDoubleStarFluxZ{1536.9628864256708, -31.569502877970095};
-
-
-        double testMomentumDoubleStarFluxX;
-        double testMomentumDoubleStarFluxY;
-        double testMomentumDoubleStarFluxZ;
-        double testEnergyDoubleStarFlux;
-        double testMagneticDoubleStarFluxY;
-        double testMagneticDoubleStarFluxZ;
-
-        for (size_t i = 0; i < parameters.names.size(); i++)
-        {
-            _hlldInternal::_doubleStarFluxes(parameters.speedSide[i],
-                                             parameters.momentumStarFluxX[i],
-                                             parameters.momentumStarFluxY[i],
-                                             parameters.momentumStarFluxZ[i],
-                                             parameters.energyStarFlux[i],
-                                             parameters.magneticStarFluxY[i],
-                                             parameters.magneticStarFluxZ[i],
-                                             parameters.densityStarL[i],
-                                             parameters.velocityStarXL[i],
-                                             parameters.velocityStarYL[i],
-                                             parameters.velocityStarZL[i],
-                                             parameters.energyStarL[i],
-                                             parameters.magneticStarYL[i],
-                                             parameters.magneticStarZL[i],
-                                             parameters.velocityDoubleStarXL[i],
-                                             parameters.velocityDoubleStarYL[i],
-                                             parameters.velocityDoubleStarZL[i],
-                                             parameters.energyDoubleStar[i],
-                                             parameters.magneticDoubleStarY[i],
-                                             parameters.magneticDoubleStarZ[i],
-                                             testMomentumDoubleStarFluxX,
-                                             testMomentumDoubleStarFluxY,
-                                             testMomentumDoubleStarFluxZ,
-                                             testEnergyDoubleStarFlux,
-                                             testMagneticDoubleStarFluxY,
-                                             testMagneticDoubleStarFluxZ);
-
-            // Now check results
-            testingUtilities::checkResults(fiducialMomentumDoubleStarFluxX[i],
-                                           testMomentumDoubleStarFluxX,
-                                           parameters.names.at(i) + ", MomentumDoubleStarFluxX");
-            testingUtilities::checkResults(fiducialMomentumDoubleStarFluxY[i],
-                                           testMomentumDoubleStarFluxY,
-                                           parameters.names.at(i) + ", MomentumDoubleStarFluxY");
-            testingUtilities::checkResults(fiducialMomentumDoubleStarFluxZ[i],
-                                           testMomentumDoubleStarFluxZ,
-                                           parameters.names.at(i) + ", MomentumDoubleStarFluxZ");
-            testingUtilities::checkResults(fiducialEnergyDoubleStarFlux[i],
-                                           testEnergyDoubleStarFlux,
-                                           parameters.names.at(i) + ", EnergyDoubleStarFlux");
-            testingUtilities::checkResults(fiducialMagneticDoubleStarFluxY[i],
-                                           testMagneticDoubleStarFluxY,
-                                           parameters.names.at(i) + ", MagneticDoubleStarFluxY");
-            testingUtilities::checkResults(fiducialMagneticDoubleStarFluxZ[i],
-                                           testMagneticDoubleStarFluxZ,
-                                           parameters.names.at(i) + ", MagneticDoubleStarFluxZ");
-            }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Test the _hlldInternal::_returnFluxes function
-     *
-     */
-    TEST(tMHDHlldInternalReturnFluxes,
-         CorrectInputExpectCorrectOutput)
-    {
-        double const dummyValue    = 999;
-        double const densityFlux   = 1;
-        double const momentumFluxX = 2;
-        double const momentumFluxY = 3;
-        double const momentumFluxZ = 4;
-        double const energyFlux    = 5;
-        double const magneticFluxY = 6;
-        double const magneticFluxZ = 7;
-
-        int threadId = 0;
-        int n_cells = 10;
-        int nFields = 8;  // Total number of conserved fields
-        #ifdef  SCALAR
-            nFields += NSCALARS;
-        #endif  // SCALAR
-        #ifdef  DE
-            nFields++;
-        #endif  //DE
-
-        // Lambda for finding indices and check if they're correct
-        auto findIndex = [](std::vector<double> const &vec,
-                            double const &num,
-                            int const &fidIndex,
-                            std::string const &name)
-        {
-            int index = std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num));
-            // EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << std::endl;
-
-            return index;
-        };
-
-        for (size_t direction = 0; direction < 3; direction++)
-        {
-            int o1, o2, o3;
-            if (direction==0) {o1 = 1; o2 = 2; o3 = 3;}
-            if (direction==1) {o1 = 2; o2 = 3; o3 = 1;}
-            if (direction==2) {o1 = 3; o2 = 1; o3 = 2;}
-
-            std::vector<double> testFluxArray(nFields*n_cells, dummyValue);
-
-            // Fiducial Indices
-            int const fiducialDensityIndex   = threadId;
-            int const fiducialMomentumIndexX = threadId + n_cells * o1;
-            int const fiducialMomentumIndexY = threadId + n_cells * o2;
-            int const fiducialMomentumIndexZ = threadId + n_cells * o3;
-            int const fiducialEnergyIndex    = threadId + n_cells * 4;
-            int const fiducialMagneticYIndex = threadId + n_cells * (o2 + 4 + NSCALARS);
-            int const fiducialMagneticZIndex = threadId + n_cells * (o3 + 4 + NSCALARS);
-
-            _hlldInternal::_returnFluxes(threadId,
-                                         o1,
-                                         o2,
-                                         o3,
-                                         n_cells,
-                                         testFluxArray.data(),
-                                         densityFlux,
-                                         momentumFluxX,
-                                         momentumFluxY,
-                                         momentumFluxZ,
-                                         energyFlux,
-                                         magneticFluxY,
-                                         magneticFluxZ);
-
-            // Find the indices for the various fields
-            int densityLoc    = findIndex(testFluxArray, densityFlux,   fiducialDensityIndex,   "density");
-            int momentumXLocX = findIndex(testFluxArray, momentumFluxX, fiducialMomentumIndexX,  "momentum X");
-            int momentumYLocY = findIndex(testFluxArray, momentumFluxY, fiducialMomentumIndexY,  "momentum Y");
-            int momentumZLocZ = findIndex(testFluxArray, momentumFluxZ, fiducialMomentumIndexZ,  "momentum Z");
-            int energyLoc     = findIndex(testFluxArray, energyFlux,    fiducialEnergyIndex,    "energy");
-            int magneticYLoc  = findIndex(testFluxArray, magneticFluxY, fiducialMagneticYIndex, "magnetic Y");
-            int magneticZLoc  = findIndex(testFluxArray, magneticFluxZ, fiducialMagneticZIndex, "magnetic Z");
-
-            for (size_t i = 0; i < testFluxArray.size(); i++)
-            {
-                // Skip the already checked indices
-                if ((i != densityLoc)    and
-                    (i != momentumXLocX) and
-                    (i != momentumYLocY) and
-                    (i != momentumZLocZ) and
-                    (i != energyLoc)     and
-                    (i != magneticYLoc)  and
-                    (i != magneticZLoc))
-                {
-                    EXPECT_EQ(dummyValue, testFluxArray.at(i))
-                        << "Unexpected value at index that _returnFluxes shouldn't be touching" << std::endl
-                        << "Index     = " << i         << std::endl
-                        << "Direction = " << direction << std::endl;
-                }
-            }
-        }
-    }
-    // =========================================================================
-#endif  // CUDA & HLLD
\ No newline at end of file
+#include "../utils/testing_utilities.h"
+
+#ifdef MHD
+// =========================================================================
+// Integration tests for the entire HLLD solver. Unit tests are below
+// =========================================================================
+
+// =========================================================================
+/*!
+* \brief Test fixture for simple testing of the HLLD Riemann Solver.
+Effectively takes the left state, right state, fiducial fluxes, and
+custom user output then performs all the required running and testing
+*
+*/
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test
+{
+ protected:
+  // =====================================================================
+  /*!
+   * \brief Compute and return the HLLD fluxes
+   *
+   * \param[in] leftState The state on the left side in conserved
+   * variables. In order the elements are: density, x-momentum,
+   * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
+   * y-magnetic field, z-magnetic field.
+   * \param[in] rightState The state on the right side in conserved
+   * variables. In order the elements are: density, x-momentum,
+   * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
+   * y-magnetic field, z-magnetic field.
+   * \param[in] gamma The adiabatic index
+   * \param[in] direction Which plane the interface is. 0 = plane normal to
+   * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0.
+   * \return std::vector<double>
+   */
+  std::vector<Real> Compute_Fluxes(std::vector<Real> stateLeft, std::vector<Real> stateRight, Real const &gamma,
+                                   int const &direction = 0)
+  {
+    // Rearrange X, Y, and Z values for the chosen direction
+    std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, stateLeft.begin() + 4);
+    std::rotate(stateRight.begin() + 1, stateRight.begin() + 4 - direction, stateRight.begin() + 4);
+
+    // Create new vectors that store the values in the way that the HLLD
+    // solver expects
+    EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), stateRight.at(grid_enum::magnetic_x))
+        << "The left and right magnetic fields are not equal";
+    std::vector<Real> const magneticX{stateLeft.at(grid_enum::magnetic_x)};
+    stateLeft.erase(stateLeft.begin() + grid_enum::magnetic_x);
+    stateRight.erase(stateRight.begin() + grid_enum::magnetic_x);
+
+    // Simulation Paramters
+    int const nx      = 1;  // Number of cells in the x-direction
+    int const ny      = 1;  // Number of cells in the y-direction
+    int const nz      = 1;  // Number of cells in the z-direction
+    int const n_cells = nx * ny * nz;
+    int nFields       = 8;  // Total number of conserved fields
+  #ifdef SCALAR
+    nFields += NSCALARS;
+  #endif  // SCALAR
+  #ifdef DE
+    nFields++;
+  #endif  // DE
+
+    // Launch Parameters
+    dim3 const dimGrid(1, 1, 1);   // How many blocks in the grid
+    dim3 const dimBlock(1, 1, 1);  // How many threads per block
+
+    // Create the std::vector to store the fluxes and declare the device
+    // pointers
+    std::vector<Real> testFlux(nFields - 1, 0);
+    Real *devConservedLeft;
+    Real *devConservedRight;
+    Real *devConservedMagXFace;
+    Real *devTestFlux;
+
+    // Allocate device arrays and copy data
+    GPU_Error_Check(cudaMalloc(&devConservedLeft, stateLeft.size() * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc(&devConservedRight, stateRight.size() * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc(&devConservedMagXFace, magneticX.size() * sizeof(Real)));
+    GPU_Error_Check(cudaMalloc(&devTestFlux, testFlux.size() * sizeof(Real)));
+
+    GPU_Error_Check(
+        cudaMemcpy(devConservedLeft, stateLeft.data(), stateLeft.size() * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(
+        cudaMemcpy(devConservedRight, stateRight.data(), stateRight.size() * sizeof(Real), cudaMemcpyHostToDevice));
+    GPU_Error_Check(
+        cudaMemcpy(devConservedMagXFace, magneticX.data(), magneticX.size() * sizeof(Real), cudaMemcpyHostToDevice));
+
+    // Run kernel
+    hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0,
+                       devConservedLeft,      // the "left" interface
+                       devConservedRight,     // the "right" interface
+                       devConservedMagXFace,  // the magnetic field at the interface
+                       devTestFlux, n_cells, gamma, direction, nFields);
+
+    GPU_Error_Check();
+    GPU_Error_Check(cudaMemcpy(testFlux.data(), devTestFlux, testFlux.size() * sizeof(Real), cudaMemcpyDeviceToHost));
+
+    // Make sure to sync with the device so we have the results
+    cudaDeviceSynchronize();
+    GPU_Error_Check();
+
+    // Free device arrays
+    cudaFree(devConservedLeft);
+    cudaFree(devConservedRight);
+    cudaFree(devConservedMagXFace);
+    cudaFree(devTestFlux);
+
+    // The HLLD solver only writes the the first two "slots" for
+    // magnetic flux so let's rearrange to make sure we have all the
+    // magnetic fluxes in the right spots
+    testFlux.insert(testFlux.begin() + grid_enum::magnetic_x, 0.0);
+    std::rotate(testFlux.begin() + 1, testFlux.begin() + 1 + direction,
+                testFlux.begin() + 4);  // Rotate momentum
+
+    return testFlux;
+  }
+  // =====================================================================
+
+  // =====================================================================
+  /*!
+   * \brief Check if the fluxes are correct
+   *
+   * \param[in] fiducialFlux The fiducial flux in conserved variables. In
+   * order the elements are: density, x-momentum,
+   * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
+   * y-magnetic field, z-magnetic field.
+   * \param[in] scalarFlux The fiducial flux in the passive scalars
+   * \param[in] thermalEnergyFlux The fiducial flux in the dual energy
+   * thermal energy
+   * \param[in] testFlux The test flux in conserved variables. In order the
+   * elements are: density, x-momentum,
+   * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
+   * y-magnetic field, z-magnetic field.
+   * \param[in] customOutput Any custom output the user would like to
+   * print. It will print after the default GTest output but before the
+   * values that failed are printed
+   * \param[in] direction Which plane the interface is. 0 = plane normal to
+   * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0.
+   */
+  void Check_Results(std::vector<Real> fiducialFlux, std::vector<Real> const &scalarFlux, Real thermalEnergyFlux,
+                     std::vector<Real> const &testFlux, std::string const &customOutput = "", int const &direction = 0)
+  {
+    // Field names
+    std::vector<std::string> fieldNames{"Densities", "X Momentum",       "Y Momentum",       "Z Momentum",
+                                        "Energies",  "X Magnetic Field", "Y Magnetic Field", "Z Magnetic Field"};
+  #ifdef DE
+    fieldNames.push_back("Thermal energy (dual energy)");
+    fiducialFlux.push_back(thermalEnergyFlux);
+  #endif  // DE
+  #ifdef SCALAR
+    std::vector<std::string> scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"};
+    fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, scalarNames.begin(),
+                      scalarNames.begin() + grid_enum::nscalars);
+
+    fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, scalarFlux.begin(),
+                        scalarFlux.begin() + grid_enum::nscalars);
+  #endif  // SCALAR
+
+    ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size()))
+        << "The fiducial flux, test flux, and field name vectors are not all "
+           "the same length"
+        << std::endl
+        << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl
+        << "testFlux.size() = " << testFlux.size() << std::endl
+        << "fieldNames.size() = " << fieldNames.size() << std::endl;
+
+    // Check for equality
+    for (size_t i = 0; i < fieldNames.size(); i++) {
+      // Check for equality and if not equal return difference
+      double absoluteDiff;
+      int64_t ulpsDiff;
+
+      // This error is consistent with the FP error in rearanging the flux
+      // computations in the Athena solver
+      double const fixedEpsilon = 2.7E-15;
+      int64_t const ulpsEpsilon = 7;
+
+      bool areEqual = testing_utilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff,
+                                                        fixedEpsilon, ulpsEpsilon);
+      EXPECT_TRUE(areEqual) << std::endl
+                            << customOutput << std::endl
+                            << "There's a difference in " << fieldNames[i] << " Flux" << std::endl
+                            << "The direction is:       " << direction << " (0=X, 1=Y, 2=Z)" << std::endl
+                            << "The fiducial value is:       " << fiducialFlux[i] << std::endl
+                            << "The test value is:           " << testFlux[i] << std::endl
+                            << "The absolute difference is:  " << absoluteDiff << std::endl
+                            << "The ULP difference is:       " << ulpsDiff << std::endl;
+    }
+  }
+  // =====================================================================
+
+  // =====================================================================
+  /*!
+   * \brief Convert a vector of quantities in primitive variables  to
+   * conserved variables
+   *
+   * \param[in] input The state in primitive variables. In order the
+   * elements are: density, x-momentum,
+   * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
+   * y-magnetic field, z-magnetic field.
+   * \return std::vector<Real> The state in conserved variables. In order
+   * the elements are: density, x-momentum,
+   * y-momentum, z-momentum, energy, passive scalars, x-magnetic field,
+   * y-magnetic field, z-magnetic field.
+   */
+  std::vector<Real> Primitive_2_Conserved(std::vector<Real> const &input, double const &gamma,
+                                          std::vector<Real> const &primitiveScalars)
+  {
+    std::vector<Real> output(input.size());
+    output.at(0) = input.at(0);                // Density
+    output.at(1) = input.at(1) * input.at(0);  // X Velocity to momentum
+    output.at(2) = input.at(2) * input.at(0);  // Y Velocity to momentum
+    output.at(3) = input.at(3) * input.at(0);  // Z Velocity to momentum
+    output.at(4) =
+        hydro_utilities::Calc_Energy_Primitive(input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), gamma,
+                                               input.at(5), input.at(6), input.at(7));  // Pressure to Energy
+    output.at(5) = input.at(5);                                                         // X Magnetic Field
+    output.at(6) = input.at(6);                                                         // Y Magnetic Field
+    output.at(7) = input.at(7);                                                         // Z Magnetic Field
+
+  #ifdef SCALAR
+    std::vector<Real> conservedScalar(primitiveScalars.size());
+    std::transform(primitiveScalars.begin(), primitiveScalars.end(), conservedScalar.begin(),
+                   [&](Real const &c) { return c * output.at(0); });
+    output.insert(output.begin() + grid_enum::magnetic_start, conservedScalar.begin(),
+                  conservedScalar.begin() + grid_enum::nscalars);
+  #endif  // SCALAR
+  #ifdef DE
+    output.push_back(mhd::utils::computeThermalEnergy(
+        output.at(4), output.at(0), output.at(1), output.at(2), output.at(3), output.at(grid_enum::magnetic_x),
+        output.at(grid_enum::magnetic_y), output.at(grid_enum::magnetic_z), gamma));
+  #endif  // DE
+    return output;
+  }
+  // =====================================================================
+
+  // =====================================================================
+  /*!
+   * \brief On test start make sure that the number of NSCALARS is allowed
+   *
+   */
+  void SetUp()
+  {
+  #ifdef SCALAR
+    ASSERT_LE(NSCALARS, 3) << "Only up to 3 passive scalars are currently "
+                              "supported in HLLD tests. NSCALARS = "
+                           << NSCALARS;
+    ASSERT_GE(NSCALARS, 1) << "There must be at least 1 passive scalar to test "
+                              "with passive scalars. NSCALARS = "
+                           << NSCALARS;
+  #endif  // SCALAR
+  }
+  // =====================================================================
+ private:
+};
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the HLLD Riemann Solver using various states and waves from
+ * the Brio & Wu Shock tube
+ *
+ */
+TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectOutput)
+{
+  // Constant Values
+  Real const gamma = 2.;
+  Real const Vz    = 0.0;
+  Real const Bx    = 0.75;
+  Real const Bz    = 0.0;
+  std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
+
+  // States
+  std::vector<Real> const  // | Density | X-Velocity | Y-Velocity | Z-Velocity |
+                           // Pressure | X-Magnetic Field | Y-Magnetic Field |
+                           // Z-Magnetic Field | Adiabatic Index | Passive
+                           // Scalars |
+      leftICs               = Primitive_2_Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, gamma, primitiveScalar),
+      leftFastRareLeftSide  = Primitive_2_Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz},
+                                                    gamma, primitiveScalar),
+      leftFastRareRightSide = Primitive_2_Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz},
+                                                    gamma, primitiveScalar),
+      compoundLeftSide  = Primitive_2_Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma,
+                                                primitiveScalar),
+      compoundPeak      = Primitive_2_Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma,
+                                                primitiveScalar),
+      compoundRightSide = Primitive_2_Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma,
+                                                primitiveScalar),
+      contactLeftSide   = Primitive_2_Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma,
+                                                primitiveScalar),
+      contactRightSide  = Primitive_2_Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma,
+                                                primitiveScalar),
+      slowShockLeftSide = Primitive_2_Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma,
+                                                primitiveScalar),
+      slowShockRightSide     = Primitive_2_Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz},
+                                                     gamma, primitiveScalar),
+      rightFastRareLeftSide  = Primitive_2_Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz},
+                                                     gamma, primitiveScalar),
+      rightFastRareRightSide = Primitive_2_Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz},
+                                                     gamma, primitiveScalar),
+      rightICs               = Primitive_2_Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar);
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Initial Condition Checks
+    {
+      std::string const outputString{
+          "Left State:  Left Brio & Wu state\n"
+          "Right State: Left Brio & Wu state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Brio & Wu state\n"
+          "Right State: Right Brio & Wu state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left Brio & Wu state\n"
+          "Right State: Right Brio & Wu state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.20673357746080057,  0.4661897584603672,
+                                           0.061170028480309613, 0,
+                                           0.064707291981509041, 0.0,
+                                           1.0074980455427278,   0};
+      std::vector<Real> const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145};
+      Real thermalEnergyFlux             = 0.20673357746080046;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left Brio & Wu state with negative Bx\n"
+          "Right State: Right Brio & Wu state with negative Bx\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.20673357746080057,   0.4661897584603672,
+                                           -0.061170028480309613, 0,
+                                           0.064707291981509041,  0.0,
+                                           1.0074980455427278,    0};
+      std::vector<Real> const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145};
+      Real thermalEnergyFlux = 0.20673357746080046;
+
+      std::vector<Real> leftICsNegBx = leftICs, rightICsNegBx = rightICs;
+      leftICsNegBx[5]  = -leftICsNegBx[5];
+      rightICsNegBx[5] = -rightICsNegBx[5];
+
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICsNegBx, rightICsNegBx, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Brio & Wu state\n"
+          "Right State: Left Brio & Wu state\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.20673357746080057,  0.4661897584603672,
+                                           0.061170028480309613,  0,
+                                           -0.064707291981509041, 0.0,
+                                           -1.0074980455427278,   0};
+      std::vector<Real> const scalarFlux{-0.22885355953447648, -0.46073027567244362, -0.6854281091039145};
+      Real thermalEnergyFlux             = -0.20673357746080046;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+
+    // Cross wave checks
+    {
+      std::string const outputString{
+          "Left State:  Left of left fast rarefaction\n"
+          "Right State: Right of left fast rarefaction\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.4253304970883941,   0.47729308161522394,
+                                           -0.55321646324583107, 0,
+                                           0.92496835095531071,  0.0,
+                                           0.53128887284876058,  0};
+      std::vector<Real> const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979};
+      Real thermalEnergyFlux = 0.41622256825457099;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftFastRareLeftSide, leftFastRareRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of left fast rarefaction\n"
+          "Right State: Left of left fast rarefaction\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.070492123816403796, 1.2489600267034342,
+                                           -0.71031457071286608, 0,
+                                           0.21008080091470105,  0.0,
+                                           0.058615131833681167, 0};
+      std::vector<Real> const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341};
+      Real thermalEnergyFlux = 0.047345816580591255;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftFastRareRightSide, leftFastRareLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of compound wave\n"
+          "Right State: Right of compound wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.4470171023231666,   0.60747660800918468,
+                                           -0.20506357956052623, 0,
+                                           0.72655525704800772,  0.0,
+                                           0.76278089951123285,  0};
+      std::vector<Real> const scalarFlux{0.4948468279606959, 0.99623058485843297, 1.482091544807598};
+      Real thermalEnergyFlux             = 0.38787931087981475;
+      std::vector<Real> const testFluxes = Compute_Fluxes(compoundLeftSide, compoundRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of compound wave\n"
+          "Right State: Left of compound wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.38496850292724116, 0.66092864409611585,
+                                           -0.3473204105316457, 0,
+                                           0.89888639514227009, 0.0,
+                                           0.71658566275120927, 0};
+      std::vector<Real> const scalarFlux{0.42615918171426637, 0.85794792823389721, 1.2763685331959034};
+      Real thermalEnergyFlux             = 0.28530908823756074;
+      std::vector<Real> const testFluxes = Compute_Fluxes(compoundRightSide, compoundLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of Compound Wave\n"
+          "Right State: Peak of Compound Wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.41864266180405574, 0.63505764056357727,
+                                           -0.1991008813536404, 0,
+                                           0.73707474818824525, 0.0,
+                                           0.74058225030218761, 0};
+      std::vector<Real> const scalarFlux{0.46343639240225803, 0.93299478173931882, 1.388015684704111};
+      Real thermalEnergyFlux             = 0.36325864563467081;
+      std::vector<Real> const testFluxes = Compute_Fluxes(compoundLeftSide, compoundPeak, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Peak of Compound Wave\n"
+          "Right State: Left of Compound Wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.39520761138156862,  0.6390998385557225,
+                                           -0.35132701297727598, 0,
+                                           0.89945171879176522,  0.0,
+                                           0.71026545717401468,  0};
+      std::vector<Real> const scalarFlux{0.43749384947851333, 0.88076699477714815, 1.3103164425435772};
+      Real thermalEnergyFlux             = 0.32239432669410983;
+      std::vector<Real> const testFluxes = Compute_Fluxes(compoundPeak, compoundLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Peak of Compound Wave\n"
+          "Right State: Right of Compound Wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.4285899590904928,   0.6079309920345296,
+                                           -0.26055320217638239, 0,
+                                           0.75090757444649436,  0.0,
+                                           0.85591904930227747,  0};
+      std::vector<Real> const scalarFlux{0.47444802592454061, 0.95516351251477749, 1.4209960899845735};
+      Real thermalEnergyFlux             = 0.34962629086469987;
+      std::vector<Real> const testFluxes = Compute_Fluxes(compoundPeak, compoundRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of Compound Wave\n"
+          "Right State: Peak of Compound Wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.39102247793946454,  0.65467021266207581,
+                                           -0.25227691377588229, 0,
+                                           0.76271525822813691,  0.0,
+                                           0.83594460438033491,  0};
+      std::vector<Real> const scalarFlux{0.43286091709705776, 0.8714399289555731, 1.2964405732397004};
+      Real thermalEnergyFlux             = 0.28979582956267347;
+      std::vector<Real> const testFluxes = Compute_Fluxes(compoundRightSide, compoundPeak, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of contact discontinuity\n"
+          "Right State: Right of contact discontinuity\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.40753761783585118, 0.62106392255463172,
+                                           -0.2455554035355339, 0,
+                                           0.73906344777217226, 0.0,
+                                           0.8687394222350926,  0};
+      std::vector<Real> const scalarFlux{0.45114313616335622, 0.90824587528847567, 1.3511967538747176};
+      Real thermalEnergyFlux             = 0.30895701155896288;
+      std::vector<Real> const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of contact discontinuity\n"
+          "Right State: Left of contact discontinuity\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.13849588572126192, 0.46025037934770729,
+                                           0.18052412687974539, 0,
+                                           0.35385590617992224, 0.0,
+                                           0.86909622543144227, 0};
+      std::vector<Real> const scalarFlux{0.15331460335320088, 0.30865449334158279, 0.45918507401922254};
+      Real thermalEnergyFlux             = 0.30928031735570188;
+      std::vector<Real> const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Slow shock left side\n"
+          "Right State: Slow shock right side\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{3.5274134848883865e-05, 0.32304849716274459,
+                                           0.60579784881286636,    0,
+                                           -0.32813070621836449,   0.0,
+                                           0.40636483121437972,    0};
+      std::vector<Real> const scalarFlux{3.9048380136491711e-05, 7.8612589559210735e-05, 0.00011695189454326261};
+      Real thermalEnergyFlux             = 4.4037784886918126e-05;
+      std::vector<Real> const testFluxes = Compute_Fluxes(slowShockLeftSide, slowShockRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Slow shock right side\n"
+          "Right State: Slow shock left side\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.016514307834939734, 0.16452009375678914,
+                                           0.71622171077118635,   0,
+                                           -0.37262428139914472,  0.0,
+                                           0.37204015363322052,   0};
+      std::vector<Real> const scalarFlux{-0.018281297976332211, -0.036804091985367396, -0.054753421923485097};
+      Real thermalEnergyFlux             = -0.020617189878790236;
+      std::vector<Real> const testFluxes = Compute_Fluxes(slowShockRightSide, slowShockLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right fast rarefaction left side\n"
+          "Right State: Right fast rarefaction right side\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.026222824218991747, 0.22254903570732654,
+                                           0.68544334213642255,   0,
+                                           -0.33339172106895454,  0.0,
+                                           0.32319665359522443,   0};
+      std::vector<Real> const scalarFlux{-0.029028601629558917, -0.058440671223894146, -0.086942145734385745};
+      Real thermalEnergyFlux = -0.020960370728633469;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightFastRareLeftSide, rightFastRareRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right fast rarefaction right side\n"
+          "Right State: Right fast rarefaction left side\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.001088867226159973,  0.32035322820305906,
+                                           0.74922357263343131,    0,
+                                           -0.0099746892805345766, 0.0,
+                                           0.0082135595470345102,  0};
+      std::vector<Real> const scalarFlux{-0.0012053733294214947, -0.0024266696462237609, -0.0036101547366371614};
+      Real thermalEnergyFlux = -0.00081785194236053073;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightFastRareRightSide, rightFastRareLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the HLLD Riemann Solver using various states and waves from
+ * the Dai & Woodward Shock tube
+ *
+ */
+TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput)
+{
+  // Constant Values
+  Real const gamma = 5. / 3.;
+  Real const coef  = 1. / (std::sqrt(4. * M_PI));
+  Real const Bx    = 4. * coef;
+  std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
+
+  // States
+  std::vector<Real> const  // | Density | X-Velocity | Y-Velocity | Z-Velocity |
+                           // Pressure | X-Magnetic Field | Y-Magnetic Field |
+                           // Z-Magnetic Field | Adiabatic Index | Passive Scalars |
+      leftICs = Primitive_2_Conserved({1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6 * coef, 2 * coef}, gamma, primitiveScalar),
+      leftFastShockLeftSide = Primitive_2_Conserved(
+          {1.09406, 1.176560, 0.021003, 0.506113, 0.970815, 1.12838, 1.105355, 0.614087}, gamma, primitiveScalar),
+      leftFastShockRightSide = Primitive_2_Conserved(
+          {1.40577, 0.693255, 0.210562, 0.611423, 1.494290, 1.12838, 1.457700, 0.809831}, gamma, primitiveScalar),
+      leftRotationLeftSide = Primitive_2_Conserved(
+          {1.40086, 0.687774, 0.215124, 0.609161, 1.485660, 1.12838, 1.458735, 0.789960}, gamma, primitiveScalar),
+      leftRotationRightSide = Primitive_2_Conserved(
+          {1.40119, 0.687504, 0.330268, 0.334140, 1.486570, 1.12838, 1.588975, 0.475782}, gamma, primitiveScalar),
+      leftSlowShockLeftSide = Primitive_2_Conserved(
+          {1.40519, 0.685492, 0.326265, 0.333664, 1.493710, 1.12838, 1.575785, 0.472390}, gamma, primitiveScalar),
+      leftSlowShockRightSide = Primitive_2_Conserved(
+          {1.66488, 0.578545, 0.050746, 0.250260, 1.984720, 1.12838, 1.344490, 0.402407}, gamma, primitiveScalar),
+      contactLeftSide = Primitive_2_Conserved(
+          {1.65220, 0.578296, 0.049683, 0.249962, 1.981250, 1.12838, 1.346155, 0.402868}, gamma, primitiveScalar),
+      contactRightSide = Primitive_2_Conserved(
+          {1.49279, 0.578276, 0.049650, 0.249924, 1.981160, 1.12838, 1.346180, 0.402897}, gamma, primitiveScalar),
+      rightSlowShockLeftSide = Primitive_2_Conserved(
+          {1.48581, 0.573195, 0.035338, 0.245592, 1.956320, 1.12838, 1.370395, 0.410220}, gamma, primitiveScalar),
+      rightSlowShockRightSide = Primitive_2_Conserved(
+          {1.23813, 0.450361, -0.275532, 0.151746, 1.439000, 1.12838, 1.609775, 0.482762}, gamma, primitiveScalar),
+      rightRotationLeftSide = Primitive_2_Conserved(
+          {1.23762, 0.450102, -0.274410, 0.145585, 1.437950, 1.12838, 1.606945, 0.493879}, gamma, primitiveScalar),
+      rightRotationRightSide = Primitive_2_Conserved(
+          {1.23747, 0.449993, -0.180766, -0.090238, 1.437350, 1.12838, 1.503855, 0.752090}, gamma, primitiveScalar),
+      rightFastShockLeftSide = Primitive_2_Conserved(
+          {1.22305, 0.424403, -0.171402, -0.085701, 1.409660, 1.12838, 1.447730, 0.723864}, gamma, primitiveScalar),
+      rightFastShockRightSide = Primitive_2_Conserved(
+          {1.00006, 0.000121, -0.000057, -0.000028, 1.000100, 1.12838, 1.128435, 0.564217}, gamma, primitiveScalar),
+      rightICs = Primitive_2_Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4 * coef, 2 * coef}, gamma, primitiveScalar);
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Initial Condition Checks
+    {
+      std::string const outputString{
+          "Left State:  Left Dai & Woodward state\n"
+          "Right State: Left Dai & Woodward state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0, 1.0381971863420549,     -1.1459155902616465, -0.63661977236758127, 0, 0.0,
+                                           0, -1.1102230246251565e-16};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Dai & Woodward state\n"
+          "Right State: Right Dai & Woodward state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          0,   0.35915494309189522,    -1.2732395447351625, -0.63661977236758127, -0.63661977236758172,
+          0.0, 2.2204460492503131e-16, -1.1283791670955123};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left Dai & Woodward state\n"
+          "Right State: Right Dai & Woodward state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.17354924587196074,  0.71614983677687327,  -1.1940929411768009,
+                                           -1.1194725181819352,  -0.11432087006939984, 0.0,
+                                           0.056156000248263505, -0.42800560867873094};
+      std::vector<Real> const scalarFlux{0.19211858644420357, 0.38677506032368902, 0.57540498691841158};
+      Real thermalEnergyFlux             = 0.24104061926661174;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Dai & Woodward state\n"
+          "Right State: Left Dai & Woodward state\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.17354924587196074,  0.71614983677687327,  -1.1940929411768009,
+                                           -0.14549552299758384,  -0.47242308031148195, 0.0,
+                                           -0.056156000248263505, -0.55262526758377528};
+      std::vector<Real> const scalarFlux{-0.19211858644420357, -0.38677506032368902, -0.57540498691841158};
+      Real thermalEnergyFlux             = -0.24104061926661174;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+
+    // Cross wave checks
+    {
+      std::string const outputString{
+          "Left State:  Left of left fast shock\n"
+          "Right State: Right of left fast shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.96813688187727132,  3.0871217875403394,   -1.4687093290523414,
+                                           -0.33726008721080036, 4.2986213406773457,   0.0,
+                                           0.84684181393860269,  -0.087452560407274671};
+      std::vector<Real> const scalarFlux{1.0717251365527865, 2.157607767226648, 3.2098715673061045};
+      Real thermalEnergyFlux = 1.2886155333980993;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftFastShockLeftSide, leftFastShockRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of left fast shock\n"
+          "Right State: Left of left fast shock\n"
+          "HLLD State: Left Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{1.3053938862274184,    2.4685129176021858, -1.181892850065283,
+                                           -0.011160487372167127, 5.1797404608257249, 0.0,
+                                           1.1889903073770265,    0.10262704114294516};
+      std::vector<Real> const scalarFlux{1.4450678072086958, 2.9092249669830292, 4.3280519500627666};
+      Real thermalEnergyFlux = 2.081389946702628;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftFastShockRightSide, leftFastShockLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of left rotation/Alfven wave\n"
+          "Right State: Right of left rotation/Alfven wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.96326128304298586,  2.8879592118317445,   -1.4808188010794987,
+                                           -0.20403672861184916, 4.014027751838869,    0.0,
+                                           0.7248753989305099,   -0.059178137562467162};
+      std::vector<Real> const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724};
+      Real thermalEnergyFlux = 1.5323573637968553;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftRotationLeftSide, leftRotationRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of left rotation/Alfven wave\n"
+          "Right State: Left of left rotation/Alfven wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.96353754504060063,  2.8875487093397085,  -1.4327309336053695,
+                                           -0.31541343522923493, 3.9739842521208342,  0.0,
+                                           0.75541746728406312,  -0.13479771672887678};
+      std::vector<Real> const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313};
+      Real thermalEnergyFlux = 1.5333744977458499;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftRotationRightSide, leftRotationLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of left slow shock\n"
+          "Right State: Right of left slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.88716095730727451,  2.9828594399125663,  -1.417062582518549,
+                                           -0.21524331343191233, 3.863474778369334,   0.0,
+                                           0.71242370728996041,  -0.05229712416644372};
+      std::vector<Real> const scalarFlux{0.98208498809672407, 1.9771433235295921, 2.9413947405483505};
+      Real thermalEnergyFlux = 1.4145715457049737;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftSlowShockLeftSide, leftSlowShockRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of left slow shock\n"
+          "Right State: Left of left slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{1.042385440439527,    2.7732383399777376,   -1.5199872074603551,
+                                           -0.21019362664841068, 4.1322001036232585,   0.0,
+                                           0.72170937317481543,  -0.049474715634396704};
+      std::vector<Real> const scalarFlux{1.1539181074575644, 2.323079478570472, 3.4560437166206879};
+      Real thermalEnergyFlux = 1.8639570701934713;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(leftSlowShockRightSide, leftSlowShockLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of contact discontinuity\n"
+          "Right State: Right of contact discontinuity\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.95545795601418737,  2.8843900822429749,   -1.4715039715239722,
+                                           -0.21575736014726318, 4.0078718055059257,   0.0,
+                                           0.72241353110189066,  -0.049073560388753337};
+      std::vector<Real> const scalarFlux{1.0576895969443709, 2.1293512784652289, 3.1678344087247892};
+      Real thermalEnergyFlux             = 1.7186185770667382;
+      std::vector<Real> const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of contact discontinuity\n"
+          "Right State: Left of contact discontinuity\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.86324813554422819,  2.8309913324581251,   -1.4761428591480787,
+                                           -0.23887765947428419, 3.9892942559102793,   0.0,
+                                           0.72244123046603836,  -0.049025527032060034};
+      std::vector<Real> const scalarFlux{0.95561355347926669, 1.9238507665182214, 2.8621114407298114};
+      Real thermalEnergyFlux             = 1.7184928987481187;
+      std::vector<Real> const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of right slow shock\n"
+          "Right State: Right of right slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.81125524370350677, 2.901639500435365,    -1.5141545346789429,
+                                           -0.262600896007809,  3.8479660419540087,   0.0,
+                                           0.7218977970017596,  -0.049091614519593846};
+      std::vector<Real> const scalarFlux{0.89805755065482806, 1.8079784457999033, 2.6897282701827465};
+      Real thermalEnergyFlux = 1.6022319728249694;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightSlowShockLeftSide, rightSlowShockRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of right slow shock\n"
+          "Right State: Left of right slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.60157947557836688,  2.3888357198399746, -1.9910500022202977,
+                                           -0.45610948442354332, 3.5359430988850069, 0.0,
+                                           1.0670963294022622,   0.05554893654378229};
+      std::vector<Real> const scalarFlux{0.66594699332331575, 1.3406911495770899, 1.994545286188885};
+      Real thermalEnergyFlux = 1.0487665253534804;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightSlowShockRightSide, rightSlowShockLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of right rotation/Alfven wave\n"
+          "Right State: Right of right rotation/Alfven wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.55701691287884714,  2.4652223621237814,  -1.9664615862227277,
+                                           -0.47490477894092042, 3.3900659850690529,  0.0,
+                                           1.0325648885587542,   0.059165409025635551};
+      std::vector<Real> const scalarFlux{0.61661634650230224, 1.2413781978573175, 1.8467974773272691};
+      Real thermalEnergyFlux = 0.9707694646266285;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightRotationLeftSide, rightRotationRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of right rotation/Alfven wave\n"
+          "Right State: Left of right rotation/Alfven wave\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.55689116371132596,  2.4648517303940851, -1.7972202655166787,
+                                           -0.90018282739798461, 3.3401033852664566, 0.0,
+                                           0.88105841856465605,  0.43911718823267476};
+      std::vector<Real> const scalarFlux{0.61647714248450702, 1.2410979509359938, 1.8463805541782863};
+      Real thermalEnergyFlux = 0.9702629326292449;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightRotationRightSide, rightRotationLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of right fast shock\n"
+          "Right State: Right of right fast shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.48777637414577313,  2.3709438477809708, -1.7282900552525988,
+                                           -0.86414423547773778, 2.8885015704245069, 0.0,
+                                           0.77133731061645838,  0.38566794697432505};
+      std::vector<Real> const scalarFlux{0.53996724117661621, 1.0870674521621893, 1.6172294888076189};
+      Real thermalEnergyFlux = 0.84330016382608752;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightFastShockLeftSide, rightFastShockRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of right fast shock\n"
+          "Right State: Left of right fast shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.040639426423817904, 1.0717156491947966,  -1.2612066401572222,
+                                           -0.63060225433149875, 0.15803727234007203, 0.0,
+                                           0.042555541396817498, 0.021277678888288909};
+      std::vector<Real> const scalarFlux{0.044987744655527385, 0.090569777630660403, 0.13474059488003065};
+      Real thermalEnergyFlux = 0.060961577855018087;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(rightFastShockRightSide, rightFastShockLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the HLLD Riemann Solver using various states and waves from
+ * the Ryu & Jones 4d Shock tube
+ *
+ */
+TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput)
+{
+  // Constant Values
+  Real const gamma = 5. / 3.;
+  Real const Bx    = 0.7;
+  std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
+
+  // States
+  std::vector<Real> const  // | Density | X-Velocity | Y-Velocity |  Z-Velocity |
+                           // Pressure | X-Magnetic Field | Y-Magnetic Field |
+                           // Z-Magnetic Field | Adiabatic Index | Passive Scalars |
+      leftICs           = Primitive_2_Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, gamma, primitiveScalar),
+      hydroRareLeftSide = Primitive_2_Conserved(
+          {0.990414, 0.012415, 1.458910e-58, 6.294360e-59, 0.984076, Bx, 1.252355e-57, 5.366795e-58}, gamma,
+          primitiveScalar),
+      hydroRareRightSide = Primitive_2_Conserved(
+          {0.939477, 0.079800, 1.557120e-41, 7.505190e-42, 0.901182, Bx, 1.823624e-40, 8.712177e-41}, gamma,
+          primitiveScalar),
+      switchOnSlowShockLeftSide = Primitive_2_Conserved(
+          {0.939863, 0.079142, 1.415730e-02, 7.134030e-03, 0.901820, Bx, 2.519650e-02, 1.290082e-02}, gamma,
+          primitiveScalar),
+      switchOnSlowShockRightSide = Primitive_2_Conserved(
+          {0.651753, 0.322362, 8.070540e-01, 4.425110e-01, 0.490103, Bx, 6.598380e-01, 3.618000e-01}, gamma,
+          primitiveScalar),
+      contactLeftSide = Primitive_2_Conserved(
+          {0.648553, 0.322525, 8.072970e-01, 4.426950e-01, 0.489951, Bx, 6.599295e-01, 3.618910e-01}, gamma,
+          primitiveScalar),
+      contactRightSide = Primitive_2_Conserved(
+          {0.489933, 0.322518, 8.073090e-01, 4.426960e-01, 0.489980, Bx, 6.599195e-01, 3.618850e-01}, gamma,
+          primitiveScalar),
+      slowShockLeftSide = Primitive_2_Conserved(
+          {0.496478, 0.308418, 8.060830e-01, 4.420150e-01, 0.489823, Bx, 6.686695e-01, 3.666915e-01}, gamma,
+          primitiveScalar),
+      slowShockRightSide = Primitive_2_Conserved(
+          {0.298260, -0.016740, 2.372870e-01, 1.287780e-01, 0.198864, Bx, 8.662095e-01, 4.757390e-01}, gamma,
+          primitiveScalar),
+      rotationLeftSide = Primitive_2_Conserved(
+          {0.298001, -0.017358, 2.364790e-01, 1.278540e-01, 0.198448, Bx, 8.669425e-01, 4.750845e-01}, gamma,
+          primitiveScalar),
+      rotationRightSide = Primitive_2_Conserved(
+          {0.297673, -0.018657, 1.059540e-02, 9.996860e-01, 0.197421, Bx, 9.891580e-01, 1.024949e-04}, gamma,
+          primitiveScalar),
+      fastRareLeftSide = Primitive_2_Conserved(
+          {0.297504, -0.020018, 1.137420e-02, 1.000000e+00, 0.197234, Bx, 9.883860e-01, -4.981931e-17}, gamma,
+          primitiveScalar),
+      fastRareRightSide = Primitive_2_Conserved(
+          {0.299996, -0.000033, 1.855120e-05, 1.000000e+00, 0.199995, Bx, 9.999865e-01, 1.737190e-16}, gamma,
+          primitiveScalar),
+      rightICs = Primitive_2_Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, gamma, primitiveScalar);
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Initial Condition Checks
+    {
+      std::string const outputString{
+          "Left State:  Left Ryu & Jones 4d state\n"
+          "Right State: Left Ryu & Jones 4d state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Ryu & Jones 4d state\n"
+          "Right State: Right Ryu & Jones 4d state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          -5.5511151231257827e-17, 0.45500000000000013, -0.69999999999999996, -5.5511151231257827e-17, 0, 0.0, 0,
+          -0.69999999999999996};
+      std::vector<Real> const scalarFlux{-6.1450707278254418e-17, -1.2371317869019906e-16, -1.8404800947169341e-16};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left Ryu & Jones 4d state\n"
+          "Right State: Right Ryu & Jones 4d state\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.092428729855986602, 0.53311593977445149,  -0.39622049648437296,
+                                           -0.21566989083797167, -0.13287876964320211, 0.0,
+                                           -0.40407579574102892, -0.21994567048141428};
+      std::vector<Real> const scalarFlux{0.10231837561464294, 0.20598837745492582, 0.30644876517012837};
+      Real thermalEnergyFlux             = 0.13864309478397996;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Ryu & Jones 4d state\n"
+          "Right State: Left Ryu & Jones 4d state\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.092428729855986602, 0.53311593977445149, -0.39622049648437296,
+                                           0.21566989083797167,   0.13287876964320211, 0.0,
+                                           0.40407579574102892,   -0.21994567048141428};
+      std::vector<Real> const scalarFlux{-0.10231837561464294, -0.20598837745492582, -0.30644876517012837};
+      Real thermalEnergyFlux             = -0.13864309478397996;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+
+    // Cross wave checks
+    {
+      std::string const outputString{
+          "Left State:  Left side of pure hydrodynamic rarefaction\n"
+          "Right State: Right side of pure hydrodynamic rarefaction\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.074035256375659553,    0.66054553664209648,    -6.1597070943493028e-41,
+                                           -2.9447391900433873e-41, 0.1776649658235645,     0.0,
+                                           -6.3466063324344113e-41, -3.0340891384335242e-41};
+      std::vector<Real> const scalarFlux{0.081956845911157775, 0.16499634214430131, 0.24546494288869905};
+      Real thermalEnergyFlux             = 0.11034221894046368;
+      std::vector<Real> const testFluxes = Compute_Fluxes(hydroRareLeftSide, hydroRareRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right side of pure hydrodynamic rarefaction\n"
+          "Right State: Left side of pure hydrodynamic rarefaction\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.013336890338886076,    0.74071279157971992,   -6.1745213352160876e-41,
+                                           -2.9474651270630147e-41, 0.033152482405470307,  0.0,
+                                           6.2022392844946449e-41,  2.9606965476795895e-41};
+      std::vector<Real> const scalarFlux{0.014763904657692993, 0.029722840565719184, 0.044218649135708464};
+      Real thermalEnergyFlux             = 0.019189877201961154;
+      std::vector<Real> const testFluxes = Compute_Fluxes(hydroRareRightSide, hydroRareLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of switch on slow shock\n"
+          "Right State: Right of switch on slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.19734622040826083,  0.47855039640569758, -0.3392293209655618,
+                                           -0.18588204716255491, 0.10695446263054809, 0.0,
+                                           -0.3558357543098733,  -0.19525093130352045};
+      std::vector<Real> const scalarFlux{0.21846177846784187, 0.43980943806215089, 0.65430419361309078};
+      Real thermalEnergyFlux = 0.2840373040888583;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(switchOnSlowShockLeftSide, switchOnSlowShockRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of switch on slow shock\n"
+          "Right State: Left of switch on slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.097593254768855386,  0.76483698872352757,    -0.02036438492698419,
+                                           -0.010747481940703562, 0.25327551496496836,    0.0,
+                                           -0.002520109973016129, -0.00088262199017708799};
+      std::vector<Real> const scalarFlux{0.10803549193474633, 0.21749813322875222, 0.32357182079044206};
+      Real thermalEnergyFlux = 0.1100817647375162;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(switchOnSlowShockRightSide, switchOnSlowShockLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of contact discontinuity\n"
+          "Right State: Right of contact discontinuity\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.2091677440314007,   0.5956612619664029,  -0.29309091669513981,
+                                           -0.16072556008504282, 0.19220050968424285, 0.0,
+                                           -0.35226977371803297, -0.19316940226499904};
+      std::vector<Real> const scalarFlux{0.23154817591476573, 0.46615510432814616, 0.69349862290347741};
+      Real thermalEnergyFlux             = 0.23702444986592192;
+      std::vector<Real> const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of contact discontinuity\n"
+          "Right State: Left of contact discontinuity\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.15801775068597168,  0.57916072367837657, -0.33437339604094024,
+                                           -0.18336617461176744, 0.16789791355547545, 0.0,
+                                           -0.3522739911439669,  -0.19317084712861482};
+      std::vector<Real> const scalarFlux{0.17492525964231936, 0.35216128279157616, 0.52391009427617696};
+      Real thermalEnergyFlux             = 0.23704936434506069;
+      std::vector<Real> const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of slow shock\n"
+          "Right State: Right of slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.11744487326715558,  0.66868230621718128,  -0.35832022960458892,
+                                           -0.19650694834641164, 0.057880816021092185, 0.0,
+                                           -0.37198011453582402, -0.20397277844271294};
+      std::vector<Real> const scalarFlux{0.13001118457092631, 0.26173981750473918, 0.38939014356639379};
+      Real thermalEnergyFlux             = 0.1738058891582446;
+      std::vector<Real> const testFluxes = Compute_Fluxes(slowShockLeftSide, slowShockRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of slow shock\n"
+          "Right State: Left of slow shock\n"
+          "HLLD State: Left Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0.038440990187426027, 0.33776683678923869,  -0.62583241538732792,
+                                           -0.3437911783906169,  -0.13471828103488348, 0.0,
+                                           -0.15165427985881363, -0.082233932588833825};
+      std::vector<Real> const scalarFlux{0.042554081172858457, 0.085670301959209896, 0.12745164834795927};
+      Real thermalEnergyFlux             = 0.038445630017261548;
+      std::vector<Real> const testFluxes = Compute_Fluxes(slowShockRightSide, slowShockLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of rotation/Alfven wave\n"
+          "Right State: Right of rotation/Alfven wave\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.0052668366104996478, 0.44242247672452317,  -0.60785196341731951,
+                                           -0.33352435102145184,   -0.21197843894720192, 0.0,
+                                           -0.18030635192654354,   -0.098381113757603278};
+      std::vector<Real> const scalarFlux{-0.0058303751166299484, -0.011737769516117116, -0.017462271505355991};
+      Real thermalEnergyFlux             = -0.0052395622905745485;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rotationLeftSide, rotationRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of rotation/Alfven wave\n"
+          "Right State: Left of rotation/Alfven wave\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.005459628948343731,  0.4415038084184626,    -0.69273580053867279,
+                                           -0.0051834737482743809, -0.037389286119015486, 0.0,
+                                           -0.026148289294373184,  -0.69914753968916865};
+      std::vector<Real> const scalarFlux{-0.0060437957583491572, -0.012167430087241717, -0.018101477236719343};
+      Real thermalEnergyFlux             = -0.0054536013916442853;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rotationRightSide, rotationLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left of fast rarefaction\n"
+          "Right State: Right of fast rarefaction\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-0.0059354802028144249, 0.44075681881443612,   -0.69194176811725872,
+                                           -0.0059354802028144804, -0.040194357552219451, 0.0,
+                                           -0.027710302430178135,  -0.70000000000000007};
+      std::vector<Real> const scalarFlux{-0.0065705619215052757, -0.013227920997059845, -0.019679168822056604};
+      Real thermalEnergyFlux             = -0.0059354109546219782;
+      std::vector<Real> const testFluxes = Compute_Fluxes(fastRareLeftSide, fastRareRightSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right of fast rarefaction\n"
+          "Right State: Left of fast rarefaction\n"
+          "HLLD State: Right Double Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-3.0171858819483255e-05, 0.45503057873272706,     -0.69998654276213712,
+                                           -3.0171858819427744e-05, -0.00014827469339251387, 0.0,
+                                           -8.2898844654399895e-05, -0.69999999999999984};
+      std::vector<Real> const scalarFlux{-3.340017317660794e-05, -6.7241562798797897e-05, -0.00010003522597924373};
+      Real thermalEnergyFlux             = -3.000421709818028e-05;
+      std::vector<Real> const testFluxes = Compute_Fluxes(fastRareRightSide, fastRareLeftSide, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the HLLD Riemann Solver using various states and waves from
+ * the Einfeldt Strong Rarefaction (EFR)
+ *
+ */
+TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput)
+{
+  // Constant Values
+  Real const gamma = 5. / 3.;
+  Real const V0    = 2.;
+  Real const Vy    = 0.0;
+  Real const Vz    = 0.0;
+  Real const Bx    = 0.0;
+  Real const Bz    = 0.0;
+
+  std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
+
+  // States
+  std::vector<Real> const  // | Density | X-Velocity | Y-Velocity | Z-Velocity |
+                           // Pressure | X-Magnetic Field | Y-Magnetic Field |
+                           // Z-Magnetic Field | Adiabatic Index | Passive Scalars |
+      leftICs = Primitive_2_Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar),
+      leftRarefactionCenter =
+          Primitive_2_Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar),
+      leftVxTurnOver =
+          Primitive_2_Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar),
+      midPoint =
+          Primitive_2_Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar),
+      rightVxTurnOver =
+          Primitive_2_Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar),
+      rightRarefactionCenter =
+          Primitive_2_Conserved({0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, primitiveScalar),
+      rightICs = Primitive_2_Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar);
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Initial Condition Checks
+    {
+      std::string const outputString{
+          "Left State:  Left Einfeldt Strong Rarefaction state\n"
+          "Right State: Left Einfeldt Strong Rarefaction state\n"
+          "HLLD State: Right"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0};
+      std::vector<Real> const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996};
+      Real thermalEnergyFlux             = -1.3499999999999996;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Einfeldt Strong Rarefaction state\n"
+          "Right State: Right Einfeldt Strong Rarefaction state\n"
+          "HLLD State: Left"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0};
+      std::vector<Real> const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996};
+      Real thermalEnergyFlux             = 1.3499999999999996;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left Einfeldt Strong Rarefaction state\n"
+          "Right State: Right Einfeldt Strong Rarefaction state\n"
+          "HLLD State: Left Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Einfeldt Strong Rarefaction state\n"
+          "Right State: Left Einfeldt Strong Rarefaction state\n"
+          "HLLD State: Left Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.0;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+
+    // Intermediate state checks
+    {
+      std::string const outputString{
+          "Left State:  Left Einfeldt Strong Rarefaction state\n"
+          "Right State: Left rarefaction center\n"
+          "HLLD State: Right"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          -0.43523032140000006, 0.64193857338676208, -0, -0, -0.67142479846795033, 0.0, -0.21614384652000002, -0};
+      std::vector<Real> const scalarFlux{-0.48179889059681413, -0.9699623468164007, -1.4430123054318851};
+      Real thermalEnergyFlux             = -0.19705631998499995;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftICs, leftRarefactionCenter, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left rarefaction center\n"
+          "Right State: Left Einfeldt Strong Rarefaction state\n"
+          "HLLD State: Right"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0};
+      std::vector<Real> const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996};
+      Real thermalEnergyFlux             = -1.3499999999999996;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftRarefactionCenter, leftICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left rarefaction center\n"
+          "Right State: Left Vx turnover point\n"
+          "HLLD State: Right Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          -0.023176056428381629, -2.0437812714100764e-05, 0, 0, -0.00098843768795337005, 0.0, -0.011512369309265979, 0};
+      std::vector<Real> const scalarFlux{-0.025655837212088663, -0.051650588155052128, -0.076840543898599858};
+      Real thermalEnergyFlux             = -0.0052127803322822184;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftRarefactionCenter, leftVxTurnOver, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left Vx turnover point\n"
+          "Right State: Left rarefaction center\n"
+          "HLLD State: Right Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          -0.43613091609689758, 0.64135749005731213, 0, 0, -0.67086080671260462, 0.0, -0.21659109937066717, 0};
+      std::vector<Real> const scalarFlux{-0.48279584670145054, -0.9719694288205295, -1.445998239926636};
+      Real thermalEnergyFlux             = -0.19746407621898149;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftVxTurnOver, leftRarefactionCenter, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Left Vx turnover point\n"
+          "Right State: Midpoint\n"
+          "HLLD State: Right Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          -0.0011656375857387598, 0.0062355370788444902, 0, 0, -0.00055517615333601446, 0.0, -0.0005829533231464588, 0};
+      std::vector<Real> const scalarFlux{-0.0012903579278217153, -0.0025977614899708843, -0.0038646879530001054};
+      Real thermalEnergyFlux             = -0.00034184143405415065;
+      std::vector<Real> const testFluxes = Compute_Fluxes(leftVxTurnOver, midPoint, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Midpoint\n"
+          "Right State: Left Vx turnover point\n"
+          "HLLD State: Right Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          -0.0068097924351817191, 0.010501781004354172, 0, 0, -0.0027509360975397175, 0.0, -0.0033826654536986789, 0};
+      std::vector<Real> const scalarFlux{-0.0075384234028349319, -0.015176429414463658, -0.022577963432775162};
+      Real thermalEnergyFlux             = -0.001531664896602873;
+      std::vector<Real> const testFluxes = Compute_Fluxes(midPoint, leftVxTurnOver, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Midpoint\n"
+          "Right State: Right Vx turnover point\n"
+          "HLLD State: Left Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          0.0013952100758668729, 0.0061359407125797273, 0, 0, 0.00065984543596031629, 0.0, 0.00069776606396793105, 0};
+      std::vector<Real> const scalarFlux{0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683};
+      Real thermalEnergyFlux             = 0.00040916715364737997;
+      std::vector<Real> const testFluxes = Compute_Fluxes(midPoint, rightVxTurnOver, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Vx turnover point\n"
+          "Right State: Midpoint\n"
+          "HLLD State: Left Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          0.0090024688079190333, 0.011769373146023688, 0, 0, 0.003725251767222792, 0.0, 0.0045418689996141555, 0};
+      std::vector<Real> const scalarFlux{0.0099657107306674268, 0.020063068547205749, 0.029847813055181766};
+      Real thermalEnergyFlux             = 0.0020542406295284269;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightVxTurnOver, midPoint, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Vx turnover point\n"
+          "Right State: Right rarefaction center\n"
+          "HLLD State: Left Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          0.023310393229073981, 0.0033086897645311728, 0, 0, 0.0034208520409618887, 0.0, 0.011760413130542123, 0};
+      std::vector<Real> const scalarFlux{0.025804547718589466, 0.051949973634547723, 0.077285939467198722};
+      Real thermalEnergyFlux             = 0.0053191138878843835;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightVxTurnOver, rightRarefactionCenter, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right rarefaction center\n"
+          "Right State: Right Vx turnover point\n"
+          "HLLD State: Left Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          0.33914253809565298, 0.46770133685446141, 0, 0, 0.46453338019960133, 0.0, 0.17077520175095764, 0};
+      std::vector<Real> const scalarFlux{0.37542995185416178, 0.75581933514738364, 1.1244318966408966};
+      Real thermalEnergyFlux             = 0.1444638874418068;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightRarefactionCenter, rightVxTurnOver, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right rarefaction center\n"
+          "Right State: Right Einfeldt Strong Rarefaction state\n"
+          "HLLD State: Left"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          0.33976563660000003, 0.46733255780629601, 0, 0, 0.46427650313257612, 0.0, 0.17108896296000001, 0};
+      std::vector<Real> const scalarFlux{0.37611972035917141, 0.75720798400261535, 1.1264977885722693};
+      Real thermalEnergyFlux             = 0.14472930749999999;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightRarefactionCenter, rightICs, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Right Einfeldt Strong Rarefaction state\n"
+          "Right State: Right rarefaction center\n"
+          "HLLD State: Left"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0};
+      std::vector<Real> const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996};
+      Real thermalEnergyFlux             = 1.3499999999999996;
+      std::vector<Real> const testFluxes = Compute_Fluxes(rightICs, rightRarefactionCenter, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the HLLD Riemann Solver using the constant states from the
+ * examples in cholla/examples/3D
+ *
+ */
+TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux)
+{
+  // Constant Values
+  Real const gamma = 5. / 3.;
+
+  std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
+
+  // States
+  std::vector<Real> const  // | Density | X-Velocity | Y-Velocity | Z-Velocity |
+                           // Pressure    | X-Magnetic Field | Y-Magnetic Field |
+                           // Z-Magnetic Field | Adiabatic Index | Passive Scalars |
+      zeroMagneticField =
+          Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar),
+      onesMagneticField =
+          Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar);
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    {
+      std::string const outputString{
+          "Left State:  Constant state, zero magnetic field\n"
+          "Right State: Constant state, zero magnetic field\n"
+          "HLLD State: Left Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{0, 1.380658e-05, 0, 0, 0, 0, 0, 0};
+      std::vector<Real> const scalarFlux{0, 0, 0};
+      Real thermalEnergyFlux             = 0.;
+      std::vector<Real> const testFluxes = Compute_Fluxes(zeroMagneticField, zeroMagneticField, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Constant state, ones magnetic field\n"
+          "Right State: Constant state, ones magnetic field\n"
+          "HLLD State: Left Double Star"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{
+          -1.42108547152020037174e-14, 0.50001380657999994,   -1, -1, -1.7347234759768071e-18, 0.0,
+          3.4694469519536142e-18,      3.4694469519536142e-18};
+      std::vector<Real> const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14};
+      Real thermalEnergyFlux             = 0.;
+      std::vector<Real> const testFluxes = Compute_Fluxes(onesMagneticField, onesMagneticField, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the HLLD Riemann Solver with the degenerate state
+ *
+ */
+TEST_F(tMHDCalculateHLLDFluxesCUDA, DegenerateStateCorrectInputExpectCorrectOutput)
+{
+  // Constant Values
+  Real const gamma = 5. / 3.;
+  std::vector<Real> const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875};
+
+  // State
+  std::vector<Real> const  // | Density | X-Velocity | Y-Velocity | Z-Velocity |
+                           // Pressure | X-Magnetic Field | Y-Magnetic Field |
+                           // Z-Magnetic Field | Adiabatic Index | Passive
+                           // Scalars |
+      state = Primitive_2_Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, gamma, primitiveScalar);
+
+  std::vector<Real> const fiducialFlux{1, -449999997, -29999, -29999, -59994, 0.0, -29999, -29999};
+  std::vector<Real> const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998};
+  Real thermalEnergyFlux = 1.5;
+  std::string const outputString{
+      "Left State:  Degenerate state\n"
+      "Right State: Degenerate state\n"
+      "HLLD State: Left Double Star State"};
+
+  // Compute the fluxes and check for correctness
+  // Order of Fluxes is rho, vec(V), E, vec(B)
+  // If you run into issues with the energy try 0.001953125 instead.
+  // That's what I got when running the Athena solver on its own. Running
+  // the Athena solver with theses tests gave me -0.00080700946455175148
+  // though
+  for (size_t direction = 0; direction < 3; direction++) {
+    std::vector<Real> const testFluxes = Compute_Fluxes(state, state, gamma, direction);
+    Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the HLLD Riemann Solver with all zeroes
+ *
+ */
+TEST_F(tMHDCalculateHLLDFluxesCUDA, AllZeroesExpectAllZeroes)
+{
+  // Constant Values
+  Real const gamma = 5. / 3.;
+
+  // State
+  size_t numElements = 8;
+  #ifdef SCALAR
+  numElements += 3;
+  #endif  // SCALAR
+
+  std::vector<Real> const state(numElements, 0.0);
+  std::vector<Real> const fiducialFlux(8, 0.0);
+  std::vector<Real> const scalarFlux(3, 0.0);
+  Real thermalEnergyFlux = 0.0;
+
+  std::string const outputString{
+      "Left State:  All zeroes\n"
+      "Right State: All zeroes\n"
+      "HLLD State: Right Star State"};
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    // Compute the fluxes and check for correctness
+    // Order of Fluxes is rho, vec(V), E, vec(B)
+    std::vector<Real> const testFluxes = Compute_Fluxes(state, state, gamma, direction);
+    Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+* \brief Test the HLLD Riemann Solver with negative pressure, energy, and
+  density.
+*
+*/
+TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix)
+{
+  // Constant Values
+  Real const gamma = 5. / 3.;
+
+  // States
+  std::vector<Real>  // | Density | X-Momentum | Y-Momentum | Z-Momentum |
+                     // Energy   | X-Magnetic Field | Y-Magnetic Field |
+                     // Z-Magnetic Field | Adiabatic Index | Passive Scalars |
+      negativePressure              = {1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0},
+      negativeEnergy                = {1.0, 1.0, 1.0, 1.0, -(5 - gamma), 1.0, 1.0, 1.0},
+      negativeDensity               = {-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0},
+      negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, -gamma, 1.0, 1.0, 1.0},
+      negativeDensityPressure       = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0};
+
+  #ifdef SCALAR
+  std::vector<Real> const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875};
+  negativePressure.insert(negativePressure.begin() + 5, conservedScalar.begin(),
+                          conservedScalar.begin() + grid_enum::nscalars);
+  negativeEnergy.insert(negativeEnergy.begin() + 5, conservedScalar.begin(),
+                        conservedScalar.begin() + grid_enum::nscalars);
+  negativeDensity.insert(negativeDensity.begin() + 5, conservedScalar.begin(),
+                         conservedScalar.begin() + grid_enum::nscalars);
+  negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin() + 5, conservedScalar.begin(),
+                                       conservedScalar.begin() + grid_enum::nscalars);
+  negativeDensityPressure.insert(negativeDensityPressure.begin() + 5, conservedScalar.begin(),
+                                 conservedScalar.begin() + grid_enum::nscalars);
+  #endif  // SCALAR
+  #ifdef DE
+  negativePressure.push_back(mhd::utils::computeThermalEnergy(
+      negativePressure.at(4), negativePressure.at(0), negativePressure.at(1), negativePressure.at(2),
+      negativePressure.at(3), negativePressure.at(grid_enum::magnetic_x), negativePressure.at(grid_enum::magnetic_y),
+      negativePressure.at(grid_enum::magnetic_z), gamma));
+  negativeEnergy.push_back(mhd::utils::computeThermalEnergy(
+      negativeEnergy.at(4), negativeEnergy.at(0), negativeEnergy.at(1), negativeEnergy.at(2), negativeEnergy.at(3),
+      negativeEnergy.at(grid_enum::magnetic_x), negativeEnergy.at(grid_enum::magnetic_y),
+      negativeEnergy.at(grid_enum::magnetic_z), gamma));
+  negativeDensity.push_back(mhd::utils::computeThermalEnergy(
+      negativeDensity.at(4), negativeDensity.at(0), negativeDensity.at(1), negativeDensity.at(2), negativeDensity.at(3),
+      negativeDensity.at(grid_enum::magnetic_x), negativeDensity.at(grid_enum::magnetic_y),
+      negativeDensity.at(grid_enum::magnetic_z), gamma));
+  negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy(
+      negativeDensityEnergyPressure.at(4), negativeDensityEnergyPressure.at(0), negativeDensityEnergyPressure.at(1),
+      negativeDensityEnergyPressure.at(2), negativeDensityEnergyPressure.at(3),
+      negativeDensityEnergyPressure.at(grid_enum::magnetic_x), negativeDensityEnergyPressure.at(grid_enum::magnetic_y),
+      negativeDensityEnergyPressure.at(grid_enum::magnetic_z), gamma));
+  negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy(
+      negativeDensityPressure.at(4), negativeDensityPressure.at(0), negativeDensityPressure.at(1),
+      negativeDensityPressure.at(2), negativeDensityPressure.at(3), negativeDensityPressure.at(grid_enum::magnetic_x),
+      negativeDensityPressure.at(grid_enum::magnetic_y), negativeDensityPressure.at(grid_enum::magnetic_z), gamma));
+  #endif  // DE
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    {
+      std::string const outputString{
+          "Left State:  Negative Pressure\n"
+          "Right State: Negative Pressure\n"
+          "HLLD State: Left Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0};
+      std::vector<Real> const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998};
+      Real thermalEnergyFlux             = -1.5;
+      std::vector<Real> const testFluxes = Compute_Fluxes(negativePressure, negativePressure, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Negative Energy\n"
+          "Right State: Negative Energy\n"
+          "HLLD State: Left Star State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0};
+      std::vector<Real> const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998};
+      Real thermalEnergyFlux             = -6.333333333333333;
+      std::vector<Real> const testFluxes = Compute_Fluxes(negativeEnergy, negativeEnergy, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Negative Density\n"
+          "Right State: Negative Density\n"
+          "HLLD State: Left State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{1, 1E+20, 1e+20, 1e+20, -5e+19, 0, 0, 0};
+      std::vector<Real> const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20};
+      Real thermalEnergyFlux             = -1.5000000000000001e+40;
+      std::vector<Real> const testFluxes = Compute_Fluxes(negativeDensity, negativeDensity, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Negative Density, Energy, and Pressure\n"
+          "Right State: Negative Density, Energy, and Pressure\n"
+          "HLLD State: Right State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, 1.5E+20, 0, 0, 0};
+      std::vector<Real> const scalarFlux{-1.1069975296000002e+20, -2.2286185018000002e+20, -3.3155141874999997e+20};
+      Real thermalEnergyFlux = 1.5000000000000001e+40;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(negativeDensityEnergyPressure, negativeDensityEnergyPressure, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+    {
+      std::string const outputString{
+          "Left State:  Negative Density and Pressure\n"
+          "Right State: Negative Density and Pressure\n"
+          "HLLD State: Left State"};
+      // Compute the fluxes and check for correctness
+      // Order of Fluxes is rho, vec(V), E, vec(B)
+      std::vector<Real> const fiducialFlux{1, 1e+20, 1e+20, 1e+20, -1.5e+20, 0, 0, 0};
+      std::vector<Real> const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20};
+      Real thermalEnergyFlux = -1.5000000000000001e+40;
+      std::vector<Real> const testFluxes =
+          Compute_Fluxes(negativeDensityPressure, negativeDensityPressure, gamma, direction);
+      Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction);
+    }
+  }
+}
+// =========================================================================
+
+// =========================================================================
+// End of integration tests for the entire HLLD solver. Unit tests are below
+// =========================================================================
+
+// =========================================================================
+// Unit tests for the contents of the mhd::internal namespace
+// =========================================================================
+/*!
+ * \brief A struct to hold some basic test values
+ *
+ */
+namespace
+{
+struct TestParams {
+  // List of cases
+  std::vector<std::string> names{"Case 1", "Case 2"};
+
+  double const gamma = 5. / 3.;
+
+  std::vector<double> const magneticX{92.75101068883114, 31.588767769990532};
+
+  std::vector<mhd::internal::State> stateLVec{
+      {21.50306776645775, 1.7906564444824999, 0.33040135813215948, 1.500111692877206, 65.751208381099417,
+       12.297499156516622, 46.224045698787776, 9.9999999999999995e-21, 5445.3204350339083},
+      {48.316634031589935, 0.39291118391272883, 0.69876195899931859, 1.8528943583250035, 38.461354599479826,
+       63.744719695704063, 37.703264551707541, 9.9999999999999995e-21, 3241.38784808316}},
+      stateRVec{{81.121773176226498, 0.10110493143718589, 0.17103629446142521, 0.41731155351794952, 18.88982523270516,
+                 84.991914178754897, 34.852095153095384, 9.9999999999999995e-21, 8605.4286125143772},
+                {91.029557388536347, 0.93649399297774782, 0.36277769000180521, 0.095181318599791204, 83.656397841788944,
+                 35.910258841630984, 24.052685003977757, 9.9999999999999995e-21, 4491.7524579462979}};
+
+  std::vector<mhd::internal::StarState> const starStateLVec{
+      {28.520995251761526, 1.5746306813243216, 1.3948193325212686, 6.579867455284738, 62.093488291430653,
+       62.765890944643196},
+      {54.721668215064945, 1.4363926014039052, 1.1515754515491903, 30.450436649083692, 54.279167444036723,
+       93.267654555096414}},
+      starStateRVec{{49.090695707386047, 1.0519818825796206, 0.68198273634686157, 90.44484278669114, 26.835645069149873,
+                     7.4302316959173442},
+                    {72.680005044606091, 0.61418047569879897, 0.71813570322922715, 61.33664731346812,
+                     98.974446283273181, 10.696380763901459}};
+
+  std::vector<double> totalPressureStar{66.80958736783934, 72.29644038317676};
+
+  std::vector<mhd::internal::DoubleStarState> const DoubleStarStateVec{
+      {0.79104271107837087, 0.97609103551927523, 20.943239839455895, 83.380243826880701, 45.832024557076693,
+       std::nan("0")},
+      {1.390870320696683, 0.52222643241336986, 83.851481048702098, 80.366712517307832, 55.455301414557297,
+       std::nan("0")}};
+
+  std::vector<mhd::internal::Flux> const flux{
+      {12.939239309626116, 65.054814649176265, 73.676928455867824, 16.873647595664387, 52.718887319724693,
+       58.989284454159673, 29.976925743532302},
+      {81.715245865170729, 56.098850697078028, 2.7172469834037871, 39.701329831928732, 81.63926176158796,
+       57.043444592213589, 97.733298271413588}},
+      starFlux{{0, 74.90125547448865, 16.989138610622945, 38.541822734846185, 19.095105176247017, 96.239645266242775,
+                86.225169282683467},
+               {0, 26.812722601652684, 48.349566649914976, 61.228439610525378, 45.432249733131123, 33.053375365947957,
+                15.621020824107379}};
+
+  std::vector<mhd::internal::Speeds> const speed{
+      {-22.40376497145191, -19.710500632936679, -0.81760587897407833, 9.6740190040662242, 24.295526347371595},
+      {-11.190385012513822, -4.4880642018724357, -0.026643804611559244, 3.4191202933087519, 12.519790189404299}};
+
+  TestParams() = default;
+};
+}  // namespace
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::approximateLRWaveSpeeds function
+ *
+ */
+TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput)
+{
+  TestParams const parameters;
+  std::vector<double> const fiducialSpeedL{-22.40376497145191, -11.190385012513822};
+  std::vector<double> const fiducialSpeedR{24.295526347371595, 12.519790189404299};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::Speeds testSpeed = mhd::internal::approximateLRWaveSpeeds(
+        parameters.stateLVec.at(i), parameters.stateRVec.at(i), parameters.magneticX.at(i), parameters.gamma);
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL");
+    testing_utilities::Check_Results(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::approximateMiddleWaveSpeed function
+ *
+ */
+TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<double> const fiducialSpeedM{-0.81760587897407833, -0.026643804611559244};
+
+  mhd::internal::Speeds testSpeed;
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    testSpeed.M = mhd::internal::approximateMiddleWaveSpeed(parameters.stateLVec.at(i), parameters.stateRVec.at(i),
+                                                            parameters.speed.at(i));
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::approximateStarWaveSpeed function
+ *
+ */
+TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput)
+{
+  TestParams const parameters;
+  std::vector<double> const fiducialSpeedStarL{-18.18506608966894, -4.2968910457518161};
+  std::vector<double> const fiducialSpeedStarR{12.420292938368167, 3.6786718447209252};
+
+  mhd::internal::Speeds testSpeed;
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    testSpeed.LStar = mhd::internal::approximateStarWaveSpeed(parameters.starStateLVec.at(i), parameters.speed.at(i),
+                                                              parameters.magneticX.at(i), -1);
+    testSpeed.RStar = mhd::internal::approximateStarWaveSpeed(parameters.starStateRVec.at(i), parameters.speed.at(i),
+                                                              parameters.magneticX.at(i), 1);
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialSpeedStarL.at(i), testSpeed.LStar,
+                                     parameters.names.at(i) + ", SpeedStarL");
+    testing_utilities::Check_Results(fiducialSpeedStarR.at(i), testSpeed.RStar,
+                                     parameters.names.at(i) + ", SpeedStarR");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::_nonStarFluxes function
+ *
+ */
+TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<mhd::internal::Flux> fiducialFlux{
+      {38.504606872151484, -3088.4810263278778, -1127.8835013070616, -4229.5657456907293, -12344.460641662206,
+       -8.6244637840856555, -56.365490339906408},
+      {18.984145880030045, 2250.9966820900618, -2000.3517480656785, -1155.8240512956793, -2717.2127176227905,
+       2.9729840344910059, -43.716615275067923}};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::Flux testFlux = mhd::internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i));
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density,
+                                     parameters.names.at(i) + ", DensityFlux");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX,
+                                     parameters.names.at(i) + ", MomentumFluxX");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY,
+                                     parameters.names.at(i) + ", MomentumFluxY");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ,
+                                     parameters.names.at(i) + ", MomentumFluxZ");
+    testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY,
+                                     parameters.names.at(i) + ", MagneticFluxY");
+    testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ,
+                                     parameters.names.at(i) + ", MagneticFluxZ");
+    testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::computeStarState function in the
+ * non-degenerate case
+ *
+ */
+TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<mhd::internal::StarState> fiducialStarState{{24.101290139122913, 1.4626377138501221, 5.7559806612277464,
+                                                           1023.8840191068900, 18.648382121236992, 70.095850905078336},
+                                                          {50.132466596958501, 0.85967712862308099, 1.9480712959548112,
+                                                           172.06840532772659, 66.595692901872582, 39.389537509454122}};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::StarState testStarState =
+        mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L,
+                                        parameters.magneticX.at(i), parameters.totalPressureStar.at(i));
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY,
+                                     parameters.names.at(i) + ", VelocityStarY");
+    testing_utilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ,
+                                     parameters.names.at(i) + ", VelocityStarZ");
+    testing_utilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy,
+                                     parameters.names.at(i) + ", EnergyStar");
+    testing_utilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY,
+                                     parameters.names.at(i) + ", MagneticStarY");
+    testing_utilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ,
+                                     parameters.names.at(i) + ", MagneticStarZ");
+  }
+}
+
+/*!
+ * \brief Test the mhd::internal::starFluxes function in the non-degenerate
+ * case
+ *
+ */
+TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<mhd::internal::Flux> fiducialFlux{
+      {-45.270724071132321, 1369.1771532285088, -556.91765728768155, -2368.4452742393819, -21413.063415617500,
+       -83.294404848633300, -504.84138754248409},
+      {61.395380340435793, 283.48596932136809, -101.75517013858293, -51.34364892516212, -1413.4750762739586,
+       25.139956754826922, 78.863254638038882}};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::StarState testStarState =
+        mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L,
+                                        parameters.magneticX.at(i), parameters.totalPressureStar.at(i));
+
+    mhd::internal::Flux testFlux =
+        mhd::internal::starFluxes(testStarState, parameters.stateLVec.at(i), parameters.flux.at(i),
+                                  parameters.speed.at(i), parameters.speed.at(i).L);
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density,
+                                     parameters.names.at(i) + ", DensityStarFlux");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX,
+                                     parameters.names.at(i) + ", MomentumStarFluxX");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY,
+                                     parameters.names.at(i) + ", MomentumStarFluxY");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ,
+                                     parameters.names.at(i) + ", MomentumStarFluxZ");
+    testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy,
+                                     parameters.names.at(i) + ", EnergyStarFlux");
+    testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY,
+                                     parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13);
+    testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ,
+                                     parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13);
+  }
+}
+
+/*!
+ * \brief Test the mhd::internal::starFluxes function in the degenerate
+ * case
+ *
+ */
+TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput)
+{
+  TestParams parameters;
+
+  std::vector<mhd::internal::StarState> fiducialStarState{
+      {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 4.5171065808847731e+17, 18.648382121236992,
+       70.095850905078336},
+      {50.132466596958501, 0.85967712862308099, 1.9480712959548112, 172.06840532772659, 66.595692901872582,
+       39.389537509454122}};
+
+  // Used to get us into the degenerate case
+  double const totalPressureStarMultiplier = 1E15;
+  parameters.stateLVec.at(0).totalPressure *= totalPressureStarMultiplier;
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::StarState testStarState =
+        mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L,
+                                        parameters.magneticX.at(i), parameters.totalPressureStar.at(i));
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY,
+                                     parameters.names.at(i) + ", VelocityStarY");
+    testing_utilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ,
+                                     parameters.names.at(i) + ", VelocityStarZ");
+    testing_utilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy,
+                                     parameters.names.at(i) + ", EnergyStar");
+    testing_utilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY,
+                                     parameters.names.at(i) + ", MagneticStarY");
+    testing_utilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ,
+                                     parameters.names.at(i) + ", MagneticStarZ");
+  }
+}
+
+TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput)
+{
+  TestParams parameters;
+
+  // Used to get us into the degenerate case
+  double const totalPressureStarMultiplier = 1E15;
+
+  std::vector<mhd::internal::Flux> fiducialFlux{
+      {-144.2887586578122, 1450.1348804310369, -773.30617492819886, -151.70644305354989, 1378.3797024673304,
+       -1056.6283526454272, -340.62268733874163},
+      {10.040447333773272, 284.85426012223729, -499.05932057162761, 336.35271628090368, 171.28451793017882,
+       162.96661864443826, -524.05361885198215}};
+
+  parameters.totalPressureStar.at(0) *= totalPressureStarMultiplier;
+  parameters.totalPressureStar.at(1) *= totalPressureStarMultiplier;
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::Flux testFlux =
+        mhd::internal::starFluxes(parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i),
+                                  parameters.speed.at(i), parameters.speed.at(i).L);
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density,
+                                     parameters.names.at(i) + ", DensityStarFlux");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX,
+                                     parameters.names.at(i) + ", MomentumStarFluxX");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY,
+                                     parameters.names.at(i) + ", MomentumStarFluxY");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ,
+                                     parameters.names.at(i) + ", MomentumStarFluxZ");
+    testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy,
+                                     parameters.names.at(i) + ", EnergyStarFlux");
+    testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY,
+                                     parameters.names.at(i) + ", MagneticStarFluxY");
+    testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ,
+                                     parameters.names.at(i) + ", MagneticStarFluxZ");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::computeDoubleStarState function.
+ * Non-degenerate state
+ *
+ */
+TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<mhd::internal::DoubleStarState> fiducialState{
+      {-1.5775383335759607, -3.4914062207842482, 45.259313435283325, 36.670978215630669, -2048.1953674500523,
+       1721.0582276783819},
+      {3.803188977150934, -4.2662645349592765, 71.787329583230417, 53.189673238238178, -999.79694164635089,
+       252.047167522579}};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::DoubleStarState const testState = mhd::internal::computeDoubleStarState(
+        parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), parameters.magneticX.at(i),
+        parameters.totalPressureStar.at(i), parameters.speed.at(i));
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY,
+                                     parameters.names.at(i) + ", VelocityDoubleStarY");
+    testing_utilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ,
+                                     parameters.names.at(i) + ", VelocityDoubleStarZ");
+    testing_utilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY,
+                                     parameters.names.at(i) + ", MagneticDoubleStarY");
+    testing_utilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ,
+                                     parameters.names.at(i) + ", MagneticDoubleStarZ");
+    testing_utilities::Check_Results(fiducialState.at(i).energyL, testState.energyL,
+                                     parameters.names.at(i) + ", EnergyDoubleStarL");
+    testing_utilities::Check_Results(fiducialState.at(i).energyR, testState.energyR,
+                                     parameters.names.at(i) + ", EnergyDoubleStarR");
+  }
+}
+
+/*!
+ * \brief Test the mhd::internal::computeDoubleStarState function in the
+ * degenerate state.
+ *
+ */
+TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<mhd::internal::DoubleStarState> fiducialState{
+      {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, 0.0, 90.44484278669114},
+      {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, 0.0, 61.33664731346812}};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::DoubleStarState const testState =
+        mhd::internal::computeDoubleStarState(parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), 0.0,
+                                              parameters.totalPressureStar.at(i), parameters.speed.at(i));
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY,
+                                     parameters.names.at(i) + ", VelocityDoubleStarY");
+    testing_utilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ,
+                                     parameters.names.at(i) + ", VelocityDoubleStarZ");
+    testing_utilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY,
+                                     parameters.names.at(i) + ", MagneticDoubleStarY");
+    testing_utilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ,
+                                     parameters.names.at(i) + ", MagneticDoubleStarZ");
+    testing_utilities::Check_Results(fiducialState.at(i).energyL, testState.energyL,
+                                     parameters.names.at(i) + ", EnergyDoubleStarL");
+    testing_utilities::Check_Results(fiducialState.at(i).energyR, testState.energyR,
+                                     parameters.names.at(i) + ", EnergyDoubleStarR");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::_doubleStarFluxes function
+ *
+ */
+TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<mhd::internal::Flux> const fiducialFlux{
+      {-144.2887586578122, 1450.1348804310369, -332.80193639987715, 83.687152337186944, 604.70003506833029,
+       -245.53635448727721, -746.94190287166407},
+      {10.040447333773258, 284.85426012223729, -487.87930516727664, 490.91728596722157, 59.061079503595295,
+       30.244176588794346, -466.15336272175193}};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    mhd::internal::Flux const testFlux = mhd::internal::computeDoubleStarFluxes(
+        parameters.DoubleStarStateVec.at(i), parameters.DoubleStarStateVec.at(i).energyL,
+        parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), parameters.speed.at(i),
+        parameters.speed.at(i).L, parameters.speed.at(i).LStar);
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density,
+                                     parameters.names.at(i) + ", DensityStarFlux", 5.0E-14);
+    testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX,
+                                     parameters.names.at(i) + ", MomentumStarFluxX");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY,
+                                     parameters.names.at(i) + ", MomentumStarFluxY");
+    testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ,
+                                     parameters.names.at(i) + ", MomentumStarFluxZ");
+    testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy,
+                                     parameters.names.at(i) + ", EnergyStarFlux");
+    testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY,
+                                     parameters.names.at(i) + ", MagneticStarFluxY");
+    testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ,
+                                     parameters.names.at(i) + ", MagneticStarFluxZ");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::_returnFluxes function
+ *
+ */
+TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput)
+{
+  double const dummyValue = 999;
+  mhd::internal::Flux inputFlux{1, 2, 3, 4, 5, 6, 7};
+  mhd::internal::State inputState{8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+  int threadId = 0;
+  int n_cells  = 10;
+  int nFields  = 8;  // Total number of conserved fields
+  #ifdef SCALAR
+  nFields += NSCALARS;
+  #endif  // SCALAR
+  #ifdef DE
+  nFields++;
+  #endif  // DE
+
+  // Lambda for finding indices and check if they're correct
+  auto findIndex = [](std::vector<double> const &vec, double const &num, int const &fidIndex, std::string const &name) {
+    int index = std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num));
+    EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << std::endl;
+
+    return index;
+  };
+
+  for (size_t direction = 0; direction < 1; direction++) {
+    int o1, o2, o3;
+    switch (direction) {
+      case 0:
+        o1 = 1;
+        o2 = 2;
+        o3 = 3;
+        break;
+      case 1:
+        o1 = 2;
+        o2 = 3;
+        o3 = 1;
+        break;
+      case 2:
+        o1 = 3;
+        o2 = 1;
+        o3 = 2;
+        break;
+    }
+
+    std::vector<double> testFluxArray(nFields * n_cells, dummyValue);
+
+    // Fiducial Indices
+    int const fiducialDensityIndex   = threadId + n_cells * grid_enum::density;
+    int const fiducialMomentumIndexX = threadId + n_cells * o1;
+    int const fiducialMomentumIndexY = threadId + n_cells * o2;
+    int const fiducialMomentumIndexZ = threadId + n_cells * o3;
+    int const fiducialEnergyIndex    = threadId + n_cells * grid_enum::Energy;
+    int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x);
+    int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y);
+
+    mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, testFluxArray.data(), inputFlux, inputState);
+
+    // Find the indices for the various fields
+    int densityLoc    = findIndex(testFluxArray, inputFlux.density, fiducialDensityIndex, "density");
+    int momentumXLocX = findIndex(testFluxArray, inputFlux.momentumX, fiducialMomentumIndexX, "momentum X");
+    int momentumYLocY = findIndex(testFluxArray, inputFlux.momentumY, fiducialMomentumIndexY, "momentum Y");
+    int momentumZLocZ = findIndex(testFluxArray, inputFlux.momentumZ, fiducialMomentumIndexZ, "momentum Z");
+    int energyLoc     = findIndex(testFluxArray, inputFlux.energy, fiducialEnergyIndex, "energy");
+    int magneticYLoc  = findIndex(testFluxArray, inputFlux.magneticY, fiducialMagneticYIndex, "magnetic Y");
+    int magneticZLoc  = findIndex(testFluxArray, inputFlux.magneticZ, fiducialMagneticZIndex, "magnetic Z");
+
+    for (size_t i = 0; i < testFluxArray.size(); i++) {
+      // Skip the already checked indices
+      if ((i != densityLoc) and (i != momentumXLocX) and (i != momentumYLocY) and (i != momentumZLocZ) and
+          (i != energyLoc) and (i != magneticYLoc) and (i != magneticZLoc)) {
+        EXPECT_EQ(dummyValue, testFluxArray.at(i)) << "Unexpected value at index that _returnFluxes shouldn't be "
+                                                      "touching"
+                                                   << std::endl
+                                                   << "Index     = " << i << std::endl
+                                                   << "Direction = " << direction << std::endl;
+      }
+    }
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::starTotalPressure function
+ *
+ */
+TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput)
+{
+  TestParams const parameters;
+
+  std::vector<double> const fiducialPressure{6802.2800807224075, 3476.1984612875144};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real const testPressure = mhd::internal::starTotalPressure(parameters.stateLVec.at(i), parameters.stateRVec.at(i),
+                                                               parameters.speed.at(i));
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialPressure.at(i), testPressure,
+                                     parameters.names.at(i) + ", total pressure in the star states");
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the mhd::internal::loadState function
+ *
+ */
+TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput)
+{
+  TestParams const parameters;
+  int const threadId = 0;
+  int const n_cells  = 10;
+  std::vector<double> interfaceArray(n_cells * grid_enum::num_fields);
+  std::iota(std::begin(interfaceArray), std::end(interfaceArray), 1.);
+
+  std::vector<mhd::internal::State> const fiducialState{
+      {1, 11, 21, 31, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346},
+      {1, 21, 31, 11, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346},
+      {1, 31, 11, 21, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346},
+  };
+
+  for (size_t direction = 0; direction < 3; direction++) {
+    int o1, o2, o3;
+    switch (direction) {
+      case 0:
+        o1 = 1;
+        o2 = 2;
+        o3 = 3;
+        break;
+      case 1:
+        o1 = 2;
+        o2 = 3;
+        o3 = 1;
+        break;
+      case 2:
+        o1 = 3;
+        o2 = 1;
+        o3 = 2;
+        break;
+    }
+
+    mhd::internal::State const testState = mhd::internal::loadState(interfaceArray.data(), parameters.magneticX.at(0),
+                                                                    parameters.gamma, threadId, n_cells, o1, o2, o3);
+
+    // Now check results
+    testing_utilities::Check_Results(fiducialState.at(direction).density, testState.density, ", Density");
+    testing_utilities::Check_Results(fiducialState.at(direction).velocityX, testState.velocityX, ", velocityX");
+    testing_utilities::Check_Results(fiducialState.at(direction).velocityY, testState.velocityY, ", velocityY");
+    testing_utilities::Check_Results(fiducialState.at(direction).velocityZ, testState.velocityZ, ", velocityZ");
+    testing_utilities::Check_Results(fiducialState.at(direction).energy, testState.energy, ", energy");
+    testing_utilities::Check_Results(fiducialState.at(direction).magneticY, testState.magneticY, ", magneticY");
+    testing_utilities::Check_Results(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ");
+    testing_utilities::Check_Results(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure");
+    testing_utilities::Check_Results(fiducialState.at(direction).totalPressure, testState.totalPressure,
+                                     ", totalPressure");
+  }
+}
+// =========================================================================
+#endif  // MHD
diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu
index 88b094468..1735fe24d 100644
--- a/src/riemann_solvers/roe_cuda.cu
+++ b/src/riemann_solvers/roe_cuda.cu
@@ -1,36 +1,38 @@
 /*! \file roe_cuda.cu
  *  \brief Function definitions for the cuda Roe Riemann solver.*/
 
-#ifdef CUDA
-
-#include "../utils/gpu.hpp"
 #include <math.h>
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../riemann_solvers/roe_cuda.h"
+#include "../utils/gpu.hpp"
 
-#ifdef DE //PRESSURE_DE
-#include "../utils/hydro_utilities.h"
+#ifdef DE  // PRESSURE_DE
+  #include "../utils/hydro_utilities.h"
 #endif
 
-/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, int dir, int n_fields)
- *  \brief Roe Riemann solver based on the version described in Stone et al, 2008. */
-__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields)
+/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah,
+ * int dir, int n_fields) \brief Roe Riemann solver based on the version
+ * described in Stone et al, 2008. */
+__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                          int nz, int n_ghost, Real gamma, int dir, int n_fields)
 {
   // get a thread index
-  int blockId = blockIdx.x + blockIdx.y*gridDim.x;
-  int tid = threadIdx.x + blockId * blockDim.x;
-  int zid = tid / (nx*ny);
-  int yid = (tid - zid*nx*ny) / nx;
-  int xid = tid - zid*nx*ny - yid*nx;
+  int blockId = blockIdx.x + blockIdx.y * gridDim.x;
+  int tid     = threadIdx.x + blockId * blockDim.x;
+  int zid     = tid / (nx * ny);
+  int yid     = (tid - zid * nx * ny) / nx;
+  int xid     = tid - zid * nx * ny - yid * nx;
 
-  int n_cells = nx*ny*nz;
+  int n_cells = nx * ny * nz;
 
   Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El;
   Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er;
 
   Real etah = 0.0;
-  Real g1 = gamma - 1.0;
+  Real g1   = gamma - 1.0;
   Real Hl, Hr;
   Real sqrtdl, sqrtdr, vx, vy, vz, H;
   Real vsq, asq, a;
@@ -44,93 +46,99 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R
   sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0.0;
   Real test0, test1, test2, test3, test4;
   int hlle_flag = 0;
-  #ifdef DE
+#ifdef DE
   Real dgel, gel, dger, ger, f_ge_l, f_ge_r, E_kin;
-  #endif
-  #ifdef SCALAR
-  Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], scalarr[NSCALARS], f_scalar_l[NSCALARS], f_scalar_r[NSCALARS];
-  #endif
+#endif
+#ifdef SCALAR
+  Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], scalarr[NSCALARS], f_scalar_l[NSCALARS],
+      f_scalar_r[NSCALARS];
+#endif
 
   int o1, o2, o3;
-  if (dir==0) {
-    o1 = 1; o2 = 2; o3 = 3;
+  if (dir == 0) {
+    o1 = 1;
+    o2 = 2;
+    o3 = 3;
   }
-  if (dir==1) {
-    o1 = 2; o2 = 3; o3 = 1;
+  if (dir == 1) {
+    o1 = 2;
+    o2 = 3;
+    o3 = 1;
   }
-  if (dir==2) {
-    o1 = 3; o2 = 1; o3 = 2;
+  if (dir == 2) {
+    o1 = 3;
+    o2 = 1;
+    o3 = 2;
   }
 
   // Each thread executes the solver independently
-  if (xid < nx && yid < ny && zid < nz)
-  {
+  if (xid < nx && yid < ny && zid < nz) {
     // retrieve conserved variables
-    dl  = dev_bounds_L[            tid];
-    mxl = dev_bounds_L[o1*n_cells + tid];
-    myl = dev_bounds_L[o2*n_cells + tid];
-    mzl = dev_bounds_L[o3*n_cells + tid];
-    El  = dev_bounds_L[4*n_cells + tid];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dscalarl[i] = dev_bounds_L[(5+i)*n_cells + tid];
+    dl  = dev_bounds_L[tid];
+    mxl = dev_bounds_L[o1 * n_cells + tid];
+    myl = dev_bounds_L[o2 * n_cells + tid];
+    mzl = dev_bounds_L[o3 * n_cells + tid];
+    El  = dev_bounds_L[4 * n_cells + tid];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dscalarl[i] = dev_bounds_L[(5 + i) * n_cells + tid];
     }
-    #endif
-    #ifdef DE
-    dgel = dev_bounds_L[(n_fields-1)*n_cells + tid];
-    #endif
-
-    dr  = dev_bounds_R[            tid];
-    mxr = dev_bounds_R[o1*n_cells + tid];
-    myr = dev_bounds_R[o2*n_cells + tid];
-    mzr = dev_bounds_R[o3*n_cells + tid];
-    Er  = dev_bounds_R[4*n_cells + tid];
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      dscalarr[i] = dev_bounds_R[(5+i)*n_cells + tid];
+#endif
+#ifdef DE
+    dgel = dev_bounds_L[(n_fields - 1) * n_cells + tid];
+#endif
+
+    dr  = dev_bounds_R[tid];
+    mxr = dev_bounds_R[o1 * n_cells + tid];
+    myr = dev_bounds_R[o2 * n_cells + tid];
+    mzr = dev_bounds_R[o3 * n_cells + tid];
+    Er  = dev_bounds_R[4 * n_cells + tid];
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      dscalarr[i] = dev_bounds_R[(5 + i) * n_cells + tid];
     }
-    #endif
-    #ifdef DE
-    dger = dev_bounds_R[(n_fields-1)*n_cells + tid];
-    #endif
+#endif
+#ifdef DE
+    dger = dev_bounds_R[(n_fields - 1) * n_cells + tid];
+#endif
 
     // calculate primitive variables
     vxl = mxl / dl;
     vyl = myl / dl;
     vzl = mzl / dl;
-    #ifdef DE //PRESSURE_DE
-    E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl );
-    pl = hydro_utilities::Get_Pressure_From_DE( El, El - E_kin, dgel, gamma );
-    #else
-    pl  = (El - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    pl  = fmax(pl, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+#ifdef DE  // PRESSURE_DE
+    E_kin = 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl);
+    pl    = hydro_utilities::Get_Pressure_From_DE(El, El - E_kin, dgel, gamma);
+#else
+    pl = (El - 0.5 * dl * (vxl * vxl + vyl * vyl + vzl * vzl)) * (gamma - 1.0);
+#endif  // PRESSURE_DE
+    pl = fmax(pl, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
       scalarl[i] = dscalarl[i] / dl;
     }
-    #endif
-    #ifdef DE
+#endif
+#ifdef DE
     gel = dgel / dl;
-    #endif
+#endif
     vxr = mxr / dr;
     vyr = myr / dr;
     vzr = mzr / dr;
-    #ifdef DE //PRESSURE_DE
-    E_kin = 0.5 * dr * ( vxr*vxr + vyr*vyr + vzr*vzr );
-    pr = hydro_utilities::Get_Pressure_From_DE( Er, Er - E_kin, dger, gamma );
-    #else
-    pr  = (Er - 0.5*dr*(vxr*vxr + vyr*vyr + vzr*vzr)) * (gamma - 1.0);
-    #endif //PRESSURE_DE
-    pr  = fmax(pr, (Real) TINY_NUMBER);
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
+#ifdef DE  // PRESSURE_DE
+    E_kin = 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr);
+    pr    = hydro_utilities::Get_Pressure_From_DE(Er, Er - E_kin, dger, gamma);
+#else
+    pr = (Er - 0.5 * dr * (vxr * vxr + vyr * vyr + vzr * vzr)) * (gamma - 1.0);
+#endif  // PRESSURE_DE
+    pr = fmax(pr, (Real)TINY_NUMBER);
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
       scalarr[i] = dscalarr[i] / dr;
     }
-    #endif
-    #ifdef DE
+#endif
+#ifdef DE
     ger = dger / dr;
-    #endif
+#endif
 
     // calculate the enthalpy in each cell
     Hl = (El + pl) / dl;
@@ -140,261 +148,253 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R
     // (see Stone et al., 2008, Eqn 65, or Toro 2009, 11.118)
     sqrtdl = sqrt(dl);
     sqrtdr = sqrt(dr);
-    vx = (sqrtdl*vxl + sqrtdr*vxr) / (sqrtdl + sqrtdr);
-    vy = (sqrtdl*vyl + sqrtdr*vyr) / (sqrtdl + sqrtdr);
-    vz = (sqrtdl*vzl + sqrtdr*vzr) / (sqrtdl + sqrtdr);
-    H  = (sqrtdl*Hl  + sqrtdr*Hr)  / (sqrtdl + sqrtdr);
-
+    vx     = (sqrtdl * vxl + sqrtdr * vxr) / (sqrtdl + sqrtdr);
+    vy     = (sqrtdl * vyl + sqrtdr * vyr) / (sqrtdl + sqrtdr);
+    vz     = (sqrtdl * vzl + sqrtdr * vzr) / (sqrtdl + sqrtdr);
+    H      = (sqrtdl * Hl + sqrtdr * Hr) / (sqrtdl + sqrtdr);
 
     // calculate the sound speed squared (Stone B2)
-    vsq = (vx*vx + vy*vy + vz*vz);
-    asq = g1*fmax((H - 0.5*vsq), TINY_NUMBER);
-    a = sqrt(asq);
+    vsq = (vx * vx + vy * vy + vz * vz);
+    asq = g1 * fmax((H - 0.5 * vsq), TINY_NUMBER);
+    a   = sqrt(asq);
 
-    // calculate the averaged eigenvectors of the Roe matrix (Stone Eqn B2, Toro 11.107)
+    // calculate the averaged eigenvectors of the Roe matrix (Stone Eqn B2,
+    // Toro 11.107)
     lambda_m = vx - a;
     lambda_0 = vx;
     lambda_p = vx + a;
 
     // calculate the fluxes for the left and right input states,
     // based on the average values in either cell
-    f_d_l = mxl;
-    f_mx_l = mxl*vxl + pl;
-    f_my_l = mxl*vyl;
-    f_mz_l = mxl*vzl;
-    f_E_l = (El + pl)*vxl;
-    #ifdef DE
-    f_ge_l = mxl*gel;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      f_scalar_l[i] = mxl*scalarl[i];
+    f_d_l  = mxl;
+    f_mx_l = mxl * vxl + pl;
+    f_my_l = mxl * vyl;
+    f_mz_l = mxl * vzl;
+    f_E_l  = (El + pl) * vxl;
+#ifdef DE
+    f_ge_l = mxl * gel;
+#endif
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      f_scalar_l[i] = mxl * scalarl[i];
     }
-    #endif
-
-    f_d_r = mxr;
-    f_mx_r = mxr*vxr + pr;
-    f_my_r = mxr*vyr;
-    f_mz_r = mxr*vzr;
-    f_E_r = (Er + pr)*vxr;
-    #ifdef DE
-    f_ge_r = mxr*ger;
-    #endif
-    #ifdef SCALAR
-    for (int i=0; i<NSCALARS; i++) {
-      f_scalar_r[i] = mxr*scalarr[i];
+#endif
+
+    f_d_r  = mxr;
+    f_mx_r = mxr * vxr + pr;
+    f_my_r = mxr * vyr;
+    f_mz_r = mxr * vzr;
+    f_E_r  = (Er + pr) * vxr;
+#ifdef DE
+    f_ge_r = mxr * ger;
+#endif
+#ifdef SCALAR
+    for (int i = 0; i < NSCALARS; i++) {
+      f_scalar_r[i] = mxr * scalarr[i];
     }
-    #endif
+#endif
 
     // return upwind flux if flow is supersonic
     if (lambda_m >= 0.0) {
-      dev_flux[          tid] = f_d_l;
-      dev_flux[o1*n_cells+tid] = f_mx_l;
-      dev_flux[o2*n_cells+tid] = f_my_l;
-      dev_flux[o3*n_cells+tid] = f_mz_l;
-      dev_flux[4*n_cells+tid] = f_E_l;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid] = f_scalar_l[i];
+      dev_flux[tid]                = f_d_l;
+      dev_flux[o1 * n_cells + tid] = f_mx_l;
+      dev_flux[o2 * n_cells + tid] = f_my_l;
+      dev_flux[o3 * n_cells + tid] = f_mz_l;
+      dev_flux[4 * n_cells + tid]  = f_E_l;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_scalar_l[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid] = f_ge_l;
-      #endif
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_l;
+#endif
       return;
-    }
-    else if (lambda_p <= 0.0) {
-      dev_flux[          tid] = f_d_r;
-      dev_flux[o1*n_cells+tid] = f_mx_r;
-      dev_flux[o2*n_cells+tid] = f_my_r;
-      dev_flux[o3*n_cells+tid] = f_mz_r;
-      dev_flux[4*n_cells+tid] = f_E_r;
-      #ifdef SCALAR
-      for (int i=0; i<NSCALARS; i++) {
-        dev_flux[(5+i)*n_cells+tid] = f_scalar_r[i];
+    } else if (lambda_p <= 0.0) {
+      dev_flux[tid]                = f_d_r;
+      dev_flux[o1 * n_cells + tid] = f_mx_r;
+      dev_flux[o2 * n_cells + tid] = f_my_r;
+      dev_flux[o3 * n_cells + tid] = f_mz_r;
+      dev_flux[4 * n_cells + tid]  = f_E_r;
+#ifdef SCALAR
+      for (int i = 0; i < NSCALARS; i++) {
+        dev_flux[(5 + i) * n_cells + tid] = f_scalar_r[i];
       }
-      #endif
-      #ifdef DE
-      dev_flux[(n_fields-1)*n_cells+tid] = f_ge_r;
-      #endif
+#endif
+#ifdef DE
+      dev_flux[(n_fields - 1) * n_cells + tid] = f_ge_r;
+#endif
       return;
     }
     // otherwise calculate the Roe fluxes
     else {
-
-      // calculate the difference in conserved variables across the cell interface
-      // Stone Eqn 68
-      del_d  = dr  - dl;
+      // calculate the difference in conserved variables across the cell
+      // interface Stone Eqn 68
+      del_d  = dr - dl;
       del_mx = mxr - mxl;
       del_my = myr - myl;
       del_mz = mzr - mzl;
-      del_E  = Er  - El;
-
+      del_E  = Er - El;
 
       // evaluate the flux function (Stone Eqn 66 & 67, Toro Eqn 11.29)
 
-      Real Na = 0.5/asq;
+      Real Na    = 0.5 / asq;
       Real coeff = 0.0;
 
       // left eigenvector [0] * del_q
-      a0 = del_d*Na*(0.5*g1*vsq + vx*a) - del_mx*Na*(g1*vx+a) - del_my*Na*g1*vy - del_mz*Na*g1*vz + del_E*Na*g1;
+      a0 = del_d * Na * (0.5 * g1 * vsq + vx * a) - del_mx * Na * (g1 * vx + a) - del_my * Na * g1 * vy -
+           del_mz * Na * g1 * vz + del_E * Na * g1;
       coeff = a0 * fmax(fabs(lambda_m), etah);
       sum_0 += coeff;
-      sum_1 += coeff * (vx-a);
+      sum_1 += coeff * (vx - a);
       sum_2 += coeff * vy;
       sum_3 += coeff * vz;
-      sum_4 += coeff * (H - vx*a);
+      sum_4 += coeff * (H - vx * a);
       // left eigenvector [1] * del_q
-      a1 = -del_d*vy + del_my;
+      a1    = -del_d * vy + del_my;
       coeff = a1 * fmax(fabs(lambda_0), etah);
       sum_2 += coeff;
       sum_4 += coeff * vy;
       // left eigenvector [2] * del_q
-      a2 = -del_d*vz + del_mz;
+      a2    = -del_d * vz + del_mz;
       coeff = a2 * fmax(fabs(lambda_0), etah);
       sum_3 += coeff;
       sum_4 += coeff * vz;
       // left eigenvector [3] * del_q
-      a3 = del_d*(1.0 - Na*g1*vsq) + del_mx*g1*vx/asq + del_my*g1*vy/asq + del_mz*g1*vz/asq - del_E*g1/asq;
+      a3 = del_d * (1.0 - Na * g1 * vsq) + del_mx * g1 * vx / asq + del_my * g1 * vy / asq + del_mz * g1 * vz / asq -
+           del_E * g1 / asq;
       coeff = a3 * fmax(fabs(lambda_0), etah);
       sum_0 += coeff;
       sum_1 += coeff * vx;
       sum_2 += coeff * vy;
       sum_3 += coeff * vz;
-      sum_4 += coeff * 0.5*vsq;
+      sum_4 += coeff * 0.5 * vsq;
       // left eigenvector [4] * del_q
-      a4 = del_d*Na*(0.5*g1*vsq - vx*a) - del_mx*Na*(g1*vx-a) - del_my*Na*g1*vy - del_mz*Na*g1*vz + del_E*Na*g1;
+      a4 = del_d * Na * (0.5 * g1 * vsq - vx * a) - del_mx * Na * (g1 * vx - a) - del_my * Na * g1 * vy -
+           del_mz * Na * g1 * vz + del_E * Na * g1;
       coeff = a4 * fmax(fabs(lambda_p), etah);
       sum_0 += coeff;
       sum_1 += coeff * (vx + a);
       sum_2 += coeff * vy;
       sum_3 += coeff * vz;
-      sum_4 += coeff * (H + vx*a);
-
+      sum_4 += coeff * (H + vx * a);
 
       // if density or pressure is negative, compute the HLLE fluxes
       // test intermediate states
       test0 = dl + a0;
-      test1 = mxl + a0*(vx-a);
-      test2 = myl + a0*vy;
-      test3 = mzl + a0*vz;
-      test4 = El + a0*(H-vx*a);
+      test1 = mxl + a0 * (vx - a);
+      test2 = myl + a0 * vy;
+      test3 = mzl + a0 * vz;
+      test4 = El + a0 * (H - vx * a);
 
-      if(lambda_0 > lambda_m) {
+      if (lambda_0 > lambda_m) {
         if (test0 <= 0.0) {
-          hlle_flag=1;
+          hlle_flag = 1;
         }
-        if (test4 - 0.5*(test1*test1 + test2*test2 + test3*test3)/test0 < 0.0) {
-          hlle_flag=2;
+        if (test4 - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < 0.0) {
+          hlle_flag = 2;
         }
       }
 
       test0 += a3 + a4;
-      test1 += a3*vx;
-      test2 += a1 + a3*vy;
-      test3 += a2 + a3*vz;
-      test4 += a1*vy + a2*vz + a3*0.5*vsq;
+      test1 += a3 * vx;
+      test2 += a1 + a3 * vy;
+      test3 += a2 + a3 * vz;
+      test4 += a1 * vy + a2 * vz + a3 * 0.5 * vsq;
 
-      if(lambda_p > lambda_0) {
+      if (lambda_p > lambda_0) {
         if (test0 <= 0.0) {
-          hlle_flag=1;
+          hlle_flag = 1;
         }
-        if (test4 - 0.5*(test1*test1 + test2*test2 + test3*test3)/test0 < 0.0) {
-          hlle_flag=2;
+        if (test4 - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < 0.0) {
+          hlle_flag = 2;
         }
       }
 
-      // if pressure or density is negative, and we have not already returned the supersonic fluxes,
-      // return the HLLE fluxes
+      // if pressure or density is negative, and we have not already returned
+      // the supersonic fluxes, return the HLLE fluxes
       if (hlle_flag != 0) {
-
-        Real cfl, cfr, al, ar, bm, bp, tmp;
+        Real cfl, cfr, bm, bp, tmp;
 
         // compute max and fmin wave speeds
-        cfl = sqrt(gamma*pl/dl);  // sound speed in left state
-        cfr = sqrt(gamma*pr/dr);  // sound speed in right state
+        cfl = sqrt(gamma * pl / dl);  // sound speed in left state
+        cfr = sqrt(gamma * pr / dr);  // sound speed in right state
 
         // take max/fmin of Roe eigenvalues and left and right sound speeds
-        al = fmin(lambda_m, vxl - cfl);
-        ar = fmax(lambda_p, vxr + cfr);
-
-        bm = fmin(al, (Real) 0.0);
-        bp = fmax(ar, (Real) 0.0);
+        bm = fmin(fmin(lambda_m, vxl - cfl), (Real)0.0);
+        bp = fmax(fmax(lambda_p, vxr + cfr), (Real)0.0);
 
         // compute left and right fluxes
-        f_d_l = mxl - bm*dl;
-        f_d_r = mxr - bp*dr;
+        f_d_l = mxl - bm * dl;
+        f_d_r = mxr - bp * dr;
 
-        f_mx_l = mxl*(vxl - bm) + pl;
-        f_mx_r = mxr*(vxr - bp) + pr;
+        f_mx_l = mxl * (vxl - bm) + pl;
+        f_mx_r = mxr * (vxr - bp) + pr;
 
-        f_my_l = myl*(vxl - bm);
-        f_my_r = myr*(vxr - bp);
+        f_my_l = myl * (vxl - bm);
+        f_my_r = myr * (vxr - bp);
 
-        f_mz_l = mzl*(vxl - bm);
-        f_mz_r = mzr*(vxr - bp);
+        f_mz_l = mzl * (vxl - bm);
+        f_mz_r = mzr * (vxr - bp);
 
-        f_E_l = El*(vxl - bm) + pl*vxl;
-        f_E_r = Er*(vxr - bp) + pr*vxr;
+        f_E_l = El * (vxl - bm) + pl * vxl;
+        f_E_r = Er * (vxr - bp) + pr * vxr;
 
-        #ifdef DE
-        f_ge_l = dgel*(vxl - bm);
-        f_ge_r = dger*(vxr - bp);
-        #endif
+#ifdef DE
+        f_ge_l = dgel * (vxl - bm);
+        f_ge_r = dger * (vxr - bp);
+#endif
 
-        #ifdef SCALAR
-        for (int i=0; i<NSCALARS; i++) {
-          f_scalar_l[i] = dscalarl[i]*(vxl - bm);
-          f_scalar_r[i] = dscalarr[i]*(vxr - bp);
+#ifdef SCALAR
+        for (int i = 0; i < NSCALARS; i++) {
+          f_scalar_l[i] = dscalarl[i] * (vxl - bm);
+          f_scalar_r[i] = dscalarr[i] * (vxr - bp);
         }
-        #endif
+#endif
 
         // compute the HLLE flux at the interface
-        tmp = 0.5*(bp + bm)/(bp - bm);
-
-        dev_flux[          tid] = 0.5*(f_d_l  + f_d_r)  + (f_d_l  - f_d_r)*tmp;
-        dev_flux[o1*n_cells+tid] = 0.5*(f_mx_l + f_mx_r) + (f_mx_l - f_mx_r)*tmp;
-        dev_flux[o2*n_cells+tid] = 0.5*(f_my_l + f_my_r) + (f_my_l - f_my_r)*tmp;
-        dev_flux[o3*n_cells+tid] = 0.5*(f_mz_l + f_mz_r) + (f_mz_l - f_mz_r)*tmp;
-        dev_flux[4*n_cells+tid] = 0.5*(f_E_l  + f_E_r)  + (f_E_l  - f_E_r)*tmp;
-        #ifdef SCALAR
-        for (int i=0; i<NSCALARS; i++) {
-          dev_flux[(5+i)*n_cells+tid] = 0.5*(f_scalar_l[i] + f_scalar_r[i]) + (f_scalar_l[i] - f_scalar_r[i])*tmp;
+        tmp = 0.5 * (bp + bm) / (bp - bm);
+
+        dev_flux[tid]                = 0.5 * (f_d_l + f_d_r) + (f_d_l - f_d_r) * tmp;
+        dev_flux[o1 * n_cells + tid] = 0.5 * (f_mx_l + f_mx_r) + (f_mx_l - f_mx_r) * tmp;
+        dev_flux[o2 * n_cells + tid] = 0.5 * (f_my_l + f_my_r) + (f_my_l - f_my_r) * tmp;
+        dev_flux[o3 * n_cells + tid] = 0.5 * (f_mz_l + f_mz_r) + (f_mz_l - f_mz_r) * tmp;
+        dev_flux[4 * n_cells + tid]  = 0.5 * (f_E_l + f_E_r) + (f_E_l - f_E_r) * tmp;
+#ifdef SCALAR
+        for (int i = 0; i < NSCALARS; i++) {
+          dev_flux[(5 + i) * n_cells + tid] =
+              0.5 * (f_scalar_l[i] + f_scalar_r[i]) + (f_scalar_l[i] - f_scalar_r[i]) * tmp;
         }
-        #endif
-        #ifdef DE
-        dev_flux[(n_fields-1)*n_cells+tid] = 0.5*(f_ge_l + f_ge_r) + (f_ge_l - f_ge_r)*tmp;
-        #endif
+#endif
+#ifdef DE
+        dev_flux[(n_fields - 1) * n_cells + tid] = 0.5 * (f_ge_l + f_ge_r) + (f_ge_l - f_ge_r) * tmp;
+#endif
         return;
       }
       // otherwise return the roe fluxes
       else {
-        dev_flux[          tid] = 0.5*(f_d_l  + f_d_r  - sum_0);
-        dev_flux[o1*n_cells+tid] = 0.5*(f_mx_l + f_mx_r - sum_1);
-        dev_flux[o2*n_cells+tid] = 0.5*(f_my_l + f_my_r - sum_2);
-        dev_flux[o3*n_cells+tid] = 0.5*(f_mz_l + f_mz_r - sum_3);
-        dev_flux[4*n_cells+tid] = 0.5*(f_E_l  + f_E_r  - sum_4);
-        #ifdef SCALAR
-        for (int i=0; i<NSCALARS; i++) {
-          if (dev_flux[tid] >= 0.0)
-            dev_flux[(5+i)*n_cells+tid] = dev_flux[tid] * scalarl[i];
-          else
-            dev_flux[(5+i)*n_cells+tid] = dev_flux[tid] * scalarr[i];
+        dev_flux[tid]                = 0.5 * (f_d_l + f_d_r - sum_0);
+        dev_flux[o1 * n_cells + tid] = 0.5 * (f_mx_l + f_mx_r - sum_1);
+        dev_flux[o2 * n_cells + tid] = 0.5 * (f_my_l + f_my_r - sum_2);
+        dev_flux[o3 * n_cells + tid] = 0.5 * (f_mz_l + f_mz_r - sum_3);
+        dev_flux[4 * n_cells + tid]  = 0.5 * (f_E_l + f_E_r - sum_4);
+#ifdef SCALAR
+        for (int i = 0; i < NSCALARS; i++) {
+          if (dev_flux[tid] >= 0.0) {
+            dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarl[i];
+          } else {
+            dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarr[i];
+          }
         }
-        #endif
-        #ifdef DE
-        if (dev_flux[tid] >= 0.0)
-          dev_flux[(n_fields-1)*n_cells+tid] = dev_flux[tid] * gel;
-        else
-          dev_flux[(n_fields-1)*n_cells+tid] = dev_flux[tid] * ger;
-        #endif
+#endif
+#ifdef DE
+        if (dev_flux[tid] >= 0.0) {
+          dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * gel;
+        } else {
+          dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * ger;
+        }
+#endif
       }
-
     }
-
   }
-
 }
-
-
-#endif //CUDA
diff --git a/src/riemann_solvers/roe_cuda.h b/src/riemann_solvers/roe_cuda.h
index 3e7fcc772..bff592876 100644
--- a/src/riemann_solvers/roe_cuda.h
+++ b/src/riemann_solvers/roe_cuda.h
@@ -1,19 +1,16 @@
 /*! \file roe_cuda.h
  *  \brief Declarations of functions for the cuda roe riemann solver kernel. */
 
-#ifdef CUDA
-
 #ifndef ROE_CUDA_H
-#define Roe_CUDA_H
+#define ROE_CUDA_H
 
 #include "../global/global.h"
 
+/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real
+ * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah,
+ * int dir, int n_fields) \brief Roe Riemann solver based on the version
+ * described in Stone et al, 2008. */
+__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny,
+                                          int nz, int n_ghost, Real gamma, int dir, int n_fields);
 
-/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, int dir, int n_fields)
- *  \brief Roe Riemann solver based on the version described in Stone et al, 2008. */
-__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields);
-
-
-
-#endif //ROE_CUDA_H
-#endif //CUDA
+#endif  // ROE_CUDA_H
diff --git a/src/system_tests/cooling_system_tests.cpp b/src/system_tests/cooling_system_tests.cpp
index 8b62ef092..71095151c 100644
--- a/src/system_tests/cooling_system_tests.cpp
+++ b/src/system_tests/cooling_system_tests.cpp
@@ -4,25 +4,18 @@
  *
  */
 
-
 // External Libraries and Headers
 #include <gtest/gtest.h>
-#include <cmath> // provides std:sin
+
+#include <cmath>  // provides std:sin
 
 // Local includes
 #include "../system_tests/system_tester.h"
 #include "../utils/testing_utilities.h"
 
-
-
-#ifndef PI
-#define PI 3.141592653589793
-#endif
-
 #define COOL_RHO 6.9498489284711
 
-TEST(tCOOLINGSYSTEMConstant5,
-     CorrectInputExpectCorrectOutput)
+TEST(tCOOLINGSYSTEMConstant5, CorrectInputExpectCorrectOutput)
 {
   // dt = 0.3
   // rho = COOL_RHO*1e5
@@ -31,61 +24,53 @@ TEST(tCOOLINGSYSTEMConstant5,
   /*
   double energy = 0.0014850544057189395;// Python
   */
-  double energy = 0.00148501098087863;// Cholla
-  systemTest::SystemTestRunner testObject(false, false, false);
+  double energy = 0.00148501098087863;  // Cholla
+  system_test::SystemTestRunner testObject(false, false, false);
   testObject.launchCholla();
   testObject.openHydroTestData();
 
-  testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5);
-  testingUtilities::analyticConstant(testObject,"momentum_x",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_y",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_z",0.0);
-  testingUtilities::analyticConstant(testObject,"Energy",energy);
-
+  testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5);
+  testing_utilities::analyticConstant(testObject, "momentum_x", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_y", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_z", 0.0);
+  testing_utilities::analyticConstant(testObject, "Energy", energy);
 }
 
-
-TEST(tCOOLINGSYSTEMConstant7,
-     CorrectInputExpectCorrectOutput)
+TEST(tCOOLINGSYSTEMConstant7, CorrectInputExpectCorrectOutput)
 {
   // dt = 100
   // rho = COOL_RHO*1e5
   // pressure = 1e-1
   // T = 1e7
   // double energy = 0.14982743570299709; // Python
-  double energy = 0.14982745510047499; // Cholla
-  systemTest::SystemTestRunner testObject(false, false, false);
+  double energy = 0.14982745510047499;  // Cholla
+  system_test::SystemTestRunner testObject(false, false, false);
   testObject.launchCholla();
   testObject.openHydroTestData();
 
-  testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5);
-  testingUtilities::analyticConstant(testObject,"momentum_x",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_y",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_z",0.0);
-  testingUtilities::analyticConstant(testObject,"Energy",energy);
-
+  testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5);
+  testing_utilities::analyticConstant(testObject, "momentum_x", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_y", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_z", 0.0);
+  testing_utilities::analyticConstant(testObject, "Energy", energy);
 }
 
-TEST(tCOOLINGSYSTEMConstant8,
-     CorrectInputExpectCorrectOutput)
+TEST(tCOOLINGSYSTEMConstant8, CorrectInputExpectCorrectOutput)
 {
   // dt = 90
   // rho = COOL_RHO*1e5
   // pressure = 1
   // T = 1e8
-  
+
   // double energy = 1.499669522009355; // Python
-  double energy = 1.4996695198095711; // Cholla
-  systemTest::SystemTestRunner testObject(false, false, false);
+  double energy = 1.4996695198095711;  // Cholla
+  system_test::SystemTestRunner testObject(false, false, false);
   testObject.launchCholla();
   testObject.openHydroTestData();
 
-  testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5);
-  testingUtilities::analyticConstant(testObject,"momentum_x",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_y",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_z",0.0);
-  testingUtilities::analyticConstant(testObject,"Energy",energy);
-
-
+  testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5);
+  testing_utilities::analyticConstant(testObject, "momentum_x", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_y", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_z", 0.0);
+  testing_utilities::analyticConstant(testObject, "Energy", energy);
 }
-
diff --git a/src/system_tests/gravity_system_tests.cpp b/src/system_tests/gravity_system_tests.cpp
index 76cae4d7d..c2a59c40e 100644
--- a/src/system_tests/gravity_system_tests.cpp
+++ b/src/system_tests/gravity_system_tests.cpp
@@ -20,11 +20,10 @@
  *
  */
 /// @{
-TEST(tGRAVITYSYSTEMSphericalCollapse,
-     CorrectInputExpectCorrectOutput)
+TEST(tGRAVITYSYSTEMSphericalCollapse, CorrectInputExpectCorrectOutput)
 {
-    systemTest::SystemTestRunner collapseTest;
-    collapseTest.runTest();
+  system_test::SystemTestRunner collapseTest;
+  collapseTest.runTest();
 }
 /// @}
 // =============================================================================
diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp
index 72a6dc349..6cffe9c21 100644
--- a/src/system_tests/hydro_system_tests.cpp
+++ b/src/system_tests/hydro_system_tests.cpp
@@ -5,93 +5,306 @@
  *
  */
 
-
 // External Libraries and Headers
 #include <gtest/gtest.h>
-#include <cmath> // provides std:sin
+
+#include <cmath>  // provides std:sin
 
 // Local includes
+#include "../io/io.h"
 #include "../system_tests/system_tester.h"
 #include "../utils/testing_utilities.h"
 
-
-
-#ifndef PI
-#define PI 3.141592653589793
-#endif
-
-
 // =============================================================================
-// Test Suite: tHYDROSYSTEMSodShockTube
+// Test Suite: tHYDROtMHDSYSTEMSodShockTube
 // =============================================================================
 /*!
- * \defgroup tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput
+ * \defgroup
+ * tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput
  * \brief Test the Sod Shock tube initial conditions as a parameterized test
  * with varying numbers of MPI ranks
  *
  */
 /// @{
-class tHYDROSYSTEMSodShockTubeParameterizedMpi
-      :public
-      ::testing::TestWithParam<size_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam<size_t>
 {
-protected:
-    systemTest::SystemTestRunner sodTest;
+ protected:
+  system_test::SystemTestRunner sodTest;
 };
 
-TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi,
-       CorrectInputExpectCorrectOutput)
+TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput)
 {
-    sodTest.numMpiRanks = GetParam();
-    sodTest.runTest();
+#ifdef MHD
+  sodTest.setFixedEpsilon(1.0E-4);
+
+  // Don't test the gas energy fields
+  auto datasetNames = sodTest.getDataSetsToTest();
+  datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end());
+
+  // Set the magnetic fiducial datasets to zero
+  size_t const size = 64 * 64 * 65;
+  std::vector<double> const magVec(size, 0);
+
+  for (const auto *field : {"magnetic_x", "magnetic_y", "magnetic_z"}) {
+    sodTest.setFiducialData(field, magVec);
+    datasetNames.emplace_back(field);
+  }
+
+  sodTest.setDataSetsToTest(datasetNames);
+
+  double const maxAllowedL1Error = 7.0E-3;
+  double const maxAllowedError   = 4.6E-2;
+#else
+  double const maxAllowedL1Error = 9.4E-5;
+  double const maxAllowedError   = 6.4E-4;
+#endif  // MHD
+
+  sodTest.numMpiRanks = GetParam();
+  sodTest.runTest(true, maxAllowedL1Error, maxAllowedError);
 }
 
-INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput,
-                         tHYDROSYSTEMSodShockTubeParameterizedMpi,
+INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi,
                          ::testing::Values(1, 2, 4));
 /// @}
 // =============================================================================
 
-TEST(tHYDROSYSTEMConstant,
-     CorrectInputExpectCorrectOutput)
+TEST(tHYDROSYSTEMSodShockTube, OneDimensionalCorrectInputExpectCorrectOutput)
+{
+  system_test::SystemTestRunner sod_test;
+  sod_test.runTest();
+}
+
+TEST(tHYDROSYSTEMSodShockTube, TwoDimensionalCorrectInputExpectCorrectOutput)
 {
-  systemTest::SystemTestRunner testObject(false, false, false);
+  system_test::SystemTestRunner sod_test;
+  sod_test.runTest();
+}
+
+TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput)
+{
+  system_test::SystemTestRunner testObject(false, false, false);
 
   testObject.launchCholla();
 
   testObject.openHydroTestData();
 
-  testingUtilities::analyticConstant(testObject,"density",1.0);
-  testingUtilities::analyticConstant(testObject,"momentum_x",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_y",0.0);
-  testingUtilities::analyticConstant(testObject,"momentum_z",0.0);
-  testingUtilities::analyticConstant(testObject,"Energy",1.5e-5);
-
+  testing_utilities::analyticConstant(testObject, "density", 1.0);
+  testing_utilities::analyticConstant(testObject, "momentum_x", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_y", 0.0);
+  testing_utilities::analyticConstant(testObject, "momentum_z", 0.0);
+  testing_utilities::analyticConstant(testObject, "Energy", 1.5e-5);
 }
 
-
-TEST(tHYDROSYSTEMSoundWave3D,
-     CorrectInputExpectCorrectOutput)
+TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput)
 {
-  double time = 0.05;
+  double time      = 0.05;
   double amplitude = 1e-5;
-  double dx = 1./64.;
-    
-  double real_kx = 2*PI;//kx of the physical problem
-  
-  double kx = real_kx * dx;
-  double speed = 1;//speed of wave is 1 since P = 0.6 and gamma = 1.666667
-  double phase = kx*0.5 - speed * time * real_kx; //kx*0.5 for half-cell offset
+  double dx        = 1. / 64.;
+
+  double real_kx = 2 * M_PI;  // kx of the physical problem
+
+  double kx        = real_kx * dx;
+  double speed     = 1;                                  // speed of wave is 1 since P = 0.6 and gamma = 1.666667
+  double phase     = kx * 0.5 - speed * time * real_kx;  // kx*0.5 for half-cell offset
   double tolerance = 1e-7;
 
-  systemTest::SystemTestRunner testObject(false, false, false);
+  system_test::SystemTestRunner testObject(false, false, false);
+
+#ifdef MHD
+  // Loosen correctness check to account for MHD only having PCM. This is
+  // about the error between PCM and PPMP in hydro
+  // Check Results. Values based on results in Gardiner & Stone 2008
+  #ifdef PCM
+  tolerance = 1e-6;
+  #elif defined(PLMC)
+  tolerance = 1.0E-7;
+  #elif defined(PPMC)
+  tolerance = 1.9E-9;
+  #endif  // PCM
+#endif    // MHD
 
   testObject.launchCholla();
 
   testObject.openHydroTestData();
 
-  testingUtilities::analyticSine(testObject,"density",1.0,amplitude,kx,0.0,0.0,phase,tolerance);
-  testingUtilities::analyticSine(testObject,"momentum_x",0.0,amplitude,kx,0.0,0.0,phase,tolerance);
-  //testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance);
-  //testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance);
+  ASSERT_NO_FATAL_FAILURE(
+      testing_utilities::analyticSine(testObject, "density", 1.0, amplitude, kx, 0.0, 0.0, phase, tolerance));
+  ASSERT_NO_FATAL_FAILURE(
+      testing_utilities::analyticSine(testObject, "momentum_x", 0.0, amplitude, kx, 0.0, 0.0, phase, tolerance));
+  // testing_utilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance);
+  // testing_utilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance);
+}
+
+// =============================================================================
+// Test Suite: tHYDROtMHDSYSTEMLinearWavesParameterizedMpi
+// =============================================================================
+/*!
+ * \defgroup tHYDROtMHDSYSTEMLinearWavesParameterizedMpi
+ * \brief Test the linear waves initial conditions as a parameterized test
+ * with varying numbers of MPI ranks.
+ *
+ */
+/// @{
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam<size_t>
+{
+ public:
+  tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() : waveTest(false, true, false, false){};
+
+ protected:
+  system_test::SystemTestRunner waveTest;
+
+#ifdef PCM
+  double static constexpr allowedL1Error = 4E-7;  // Based on results in Gardiner & Stone 2008
+  double static constexpr allowedError   = 4E-7;
+#elif defined(PLMC)
+  double static constexpr allowedL1Error = 1E-7;  // Based on results in Gardiner & Stone 2008
+  double static constexpr allowedError   = 1E-7;
+#elif defined(PLMP)
+  double static constexpr allowedL1Error = 1E-7;  // Based on results in Gardiner & Stone 2008
+  double static constexpr allowedError   = 1E-7;
+#elif defined(PPMC)
+  double static constexpr allowedL1Error = 2.7E-8;  // Based on results in Gardiner & Stone 2008
+  double static constexpr allowedError   = 2.7E-8;
+#elif defined(PPMP)
+  double static constexpr allowedL1Error = 2.7E-8;  // Based on results in Gardiner & Stone 2008
+  double static constexpr allowedError   = 2.7E-8;
+#endif
+
+  void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX,
+                         double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ,
+                         double const &rEigenVec_E, double const &vx = 0.0)
+  {
+    // Constant for all tests
+    size_t const N      = 32;
+    double const domain = 0.5;
+    double const gamma  = 5. / 3.;
+    double const tOut   = 2 * domain / waveSpeed;
+
+    // Settings
+    waveTest.chollaLaunchParams.append(" nx=" + to_string_exact<double>(2 * N));
+    waveTest.chollaLaunchParams.append(" ny=" + to_string_exact<double>(N));
+    waveTest.chollaLaunchParams.append(" nz=" + to_string_exact<double>(N));
+    waveTest.chollaLaunchParams.append(" tout=" + to_string_exact<double>(tOut));
+    waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact<double>(tOut));
+    waveTest.chollaLaunchParams.append(" init=Linear_Wave");
+    waveTest.chollaLaunchParams.append(" xmin=0.0");
+    waveTest.chollaLaunchParams.append(" ymin=0.0");
+    waveTest.chollaLaunchParams.append(" zmin=0.0");
+    waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact<double>(2 * domain));
+    waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact<double>(domain));
+    waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact<double>(domain));
+    waveTest.chollaLaunchParams.append(" xl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" xu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" yl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" yu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" zl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" zu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" rho=1.0");
+    waveTest.chollaLaunchParams.append(" vx=" + to_string_exact<double>(vx));
+    waveTest.chollaLaunchParams.append(" vy=0");
+    waveTest.chollaLaunchParams.append(" vz=0");
+    waveTest.chollaLaunchParams.append(" P=" + to_string_exact<double>(1 / gamma));
+    waveTest.chollaLaunchParams.append(" Bx=0");
+    waveTest.chollaLaunchParams.append(" By=0");
+    waveTest.chollaLaunchParams.append(" Bz=0");
+    waveTest.chollaLaunchParams.append(" A='1e-6'");
+    waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact<double>(gamma));
+    waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact<double>(rEigenVec_rho));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact<double>(rEigenVec_MomentumX));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact<double>(rEigenVec_MomentumY));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact<double>(rEigenVec_MomentumZ));
+    waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact<double>(rEigenVec_E));
+    waveTest.chollaLaunchParams.append(" rEigenVec_Bx=0");
+    waveTest.chollaLaunchParams.append(" rEigenVec_By=0");
+    waveTest.chollaLaunchParams.append(" rEigenVec_Bz=0");
+  }
+};
+
+// Sound Waves Moving Left and Right
+// =================================
+TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveRightMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed = 1.;
+  int const numTimeSteps = 214;
+
+  double const rEigenVec_rho       = 1;
+  double const rEigenVec_MomentumX = 1;
+  double const rEigenVec_MomentumY = 1;
+  double const rEigenVec_MomentumZ = 1;
+  double const rEigenVec_E         = 1.5;
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E);
+
+  // Set the number of MPI ranks
+  waveTest.numMpiRanks = GetParam();
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps);
+
+  // Check Results
+  waveTest.runL1ErrorTest(2 * allowedL1Error, allowedError);
+}
+
+TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveLeftMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed = 1.;
+  int const numTimeSteps = 214;
+
+  double const rEigenVec_rho       = 1;
+  double const rEigenVec_MomentumX = -1;
+  double const rEigenVec_MomentumY = 1;
+  double const rEigenVec_MomentumZ = 1;
+  double const rEigenVec_E         = 1.5;
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E);
+
+  // Set the number of MPI ranks
+  waveTest.numMpiRanks = GetParam();
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps);
+
+  // Check Results
+  waveTest.runL1ErrorTest(2 * allowedL1Error, allowedError);
+}
+
+// Contact Waves Moving Left and Right
+// ===================================
+TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, HydroContactWaveCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed = 1.0;
+  int const numTimeSteps = 427;
+
+  double const rEigenVec_rho       = 1;
+  double const rEigenVec_MomentumX = 1;
+  double const rEigenVec_MomentumY = 0;
+  double const rEigenVec_MomentumZ = 0;
+  double const rEigenVec_E         = 0.5;
+  double const velocityX           = waveSpeed;
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, velocityX);
+
+  // Set the number of MPI ranks
+  waveTest.numMpiRanks = GetParam();
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps);
+
+  // Check Results
+  waveTest.runL1ErrorTest(allowedL1Error, allowedError);
 }
+
+INSTANTIATE_TEST_SUITE_P(, tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1));
+/// @}
+// =============================================================================
\ No newline at end of file
diff --git a/src/system_tests/input_files/blank_settings_file.txt b/src/system_tests/input_files/blank_settings_file.txt
new file mode 100644
index 000000000..e8fbd7e77
--- /dev/null
+++ b/src/system_tests/input_files/blank_settings_file.txt
@@ -0,0 +1,3 @@
+# This is blank file for system tests that are setting all the parameters
+# internally to point at. Without a blank file cholla will crash
+
diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..dd54ff082
--- /dev/null
+++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,56 @@
+#
+# Parameter File for 1D Sod Shock tube
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=1
+# number of grid cells in the z dimension
+nz=1
+# final output time
+tout=0.2
+# time interval for output
+outstep=0.2
+# name of initial conditions
+init=Riemann
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.0
+# velocity of left state
+vx_l=0.0
+vy_l=0.0
+vz_l=0.0
+# pressure of left state
+P_l=1.0
+# density of right state
+rho_r=0.1
+# velocity of right state
+vx_r=0.0
+vy_r=0.0
+vz_r=0.0
+# pressure of right state
+P_r=0.1
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=1.4
diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..c89e179be
--- /dev/null
+++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,56 @@
+#
+# Parameter File for 1D Sod Shock tube
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=64
+# number of grid cells in the z dimension
+nz=1
+# final output time
+tout=0.2
+# time interval for output
+outstep=0.2
+# name of initial conditions
+init=Riemann
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.0
+# velocity of left state
+vx_l=0.0
+vy_l=0.0
+vz_l=0.0
+# pressure of left state
+P_l=1.0
+# density of right state
+rho_r=0.1
+# velocity of right state
+vx_r=0.0
+vy_r=0.0
+vz_r=0.0
+# pressure of right state
+P_r=0.1
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=1.4
diff --git a/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt
new file mode 100644
index 000000000..38686bfbd
--- /dev/null
+++ b/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt
@@ -0,0 +1,72 @@
+#
+# Parameter File for MHD slow magnetosonic wave
+# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/)
+# for details on each wave
+# The right eigenvector for this wave is:
+# (1/(6*sqrt(5))) * [12, +/-6, +/-8*sqrt(2), +/-4, 0, -4*sqrt(2), -2, 9]
+# The terms with two sign options: use the left one for right moving waves and
+# the right one for left moving waves
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=32
+# number of grid cells in the z dimension
+nz=32
+# final output time
+tout=2.0
+# time interval for output
+outstep=2.0
+# name of initial conditions
+init=Linear_Wave
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=0.5
+zlen=0.5
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for linear wave problems
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=0
+# velocity in the y direction
+vy=0
+# velocity in the z direction
+vz=0
+# initial pressure
+P=0.6
+# magnetic field in the x direction
+Bx=1
+# magnetic field in the y direction
+By=1.5
+# magnetic field in the z direction
+Bz=0
+# amplitude of perturbing oscillations
+A=1e-6
+# value of gamma
+gamma=1.666666666666667
+# The right eigenvectors to set the wave properly
+rEigenVec_rho=0.8944271909999159
+rEigenVec_MomentumX=0.4472135954999579
+rEigenVec_MomentumY=0.8944271909999159
+rEigenVec_MomentumZ=0.0
+rEigenVec_Bx=0.0
+rEigenVec_By=-0.4472135954999579
+rEigenVec_Bz=0.0
+rEigenVec_E=0.6708203932499369
+
diff --git a/src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt
similarity index 100%
rename from src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt
rename to src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt
diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt
similarity index 95%
rename from src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt
rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt
index 6fb66732b..efcd912fd 100644
--- a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt
+++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt
@@ -25,10 +25,10 @@ zlen=1.0
 # type of boundary conditions
 xl_bcnd=3
 xu_bcnd=3
-yl_bcnd=0
-yu_bcnd=0
-zl_bcnd=0
-zu_bcnd=0
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
 # path to output directory
 outdir=./
 
@@ -54,4 +54,3 @@ P_r=0.1
 diaph=0.5
 # value of gamma
 gamma=1.4
-
diff --git a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt
similarity index 70%
rename from src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt
rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt
index f1c23ea6e..efdedaceb 100644
--- a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt
+++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt
@@ -14,7 +14,7 @@ tout=0.05
 # time interval for output
 outstep=0.05
 # name of initial conditions
-init=Sound_Wave
+init=Linear_Wave
 # domain properties
 xmin=0.0
 ymin=0.0
@@ -34,18 +34,31 @@ outdir=./
 
 #################################################
 # Parameters for linear wave problems
-# initial density 
+# initial density
 rho=1.0
-# velocity in the x direction 
+# velocity in the x direction
 vx=0
 # velocity in the y direction
 vy=0
 # velocity in the z direction
 vz=0
-# initial pressure 
+# initial pressure
 P=0.6
 # amplitude of perturbing oscillations
 A=1e-5
 # value of gamma
 gamma=1.666666666666667
+# The right eigenvectors to set the wave properly
+rEigenVec_rho=1
+rEigenVec_MomentumX=1
+rEigenVec_MomentumY=1
+rEigenVec_MomentumZ=1
+rEigenVec_E=1.5
 
+# Set the magnetic field quantities to zero
+Bx=0
+By=0
+Bz=0
+rEigenVec_Bx=0
+rEigenVec_By=0
+rEigenVec_Bz=0
\ No newline at end of file
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..d6a733d3c
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,55 @@
+#
+# Parameter File for an MHD Advecting Field Loop as defined in
+# [Gardiner & Stone 2008](https://ui.adsabs.harvard.edu/abs/2008JCoPh.227.4123G/abstract)
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=32
+# number of grid cells in the y dimension
+ny=32
+# number of grid cells in the z dimension
+nz=64
+# final output time
+tout=2.0
+# time interval for output
+outstep=2.0
+# name of initial conditions
+init=Advecting_Field_Loop
+# domain properties
+xmin=-0.5
+ymin=-0.5
+zmin=-1.0
+xlen=1.0
+ylen=1.0
+zlen=2.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for linear wave problems
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=1.0
+# velocity in the y direction
+vy=1.0
+# velocity in the z direction
+vz=2.0
+# initial pressure
+P=1.0
+# amplitude of the loop/magnetic field background value
+A=0.001
+# Radius of the Loop
+radius=0.3
+
+# value of gamma
+gamma=1.666666666666667
+
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..514dd3359
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,72 @@
+#
+# Parameter File for 3D Brio & Wu MHD shock tube
+# Citation: Brio & Wu 1988 "An Upwind Differencing Scheme for the Equations of
+# Ideal Magnetohydrodynamics"
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=64
+# number of grid cells in the z dimension
+nz=64
+# final output time
+tout=0.1
+# time interval for output
+outstep=0.1
+# name of initial conditions
+init=Riemann
+
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.0
+# velocity of left state
+vx_l=0
+vy_l=0
+vz_l=0
+# pressure of left state
+P_l=1.0
+# Magnetic field of the left state
+Bx_l=0.75
+By_l=1.0
+Bz_l=0.0
+
+# density of right state
+rho_r=0.128
+# velocity of right state
+vx_r=0
+vy_r=0
+vz_r=0
+# pressure of right state
+P_r=0.1
+# Magnetic field of the right state
+Bx_r=0.75
+By_r=-1.0
+Bz_r=0.0
+
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=2.0
+
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..eabea0e60
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,50 @@
+#
+# Parameter File for 3D box filled with gas
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=16
+# number of grid cells in the y dimension
+ny=16
+# number of grid cells in the z dimension
+nz=16
+# final output time
+tout=100000.0
+# time interval for output
+outstep=100000.0
+# name of initial conditions
+init=Constant
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# density
+rho=1e4
+# velocity
+vx=0
+vy=0
+vz=0
+# pressure
+P=1.380658e-5
+# Magnetic Field
+Bx=1.0e-5
+By=2.0e-5
+Bz=3.0e-5
+# value of gamma
+gamma=1.666666667
+
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..4f52b7cd6
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,50 @@
+#
+# Parameter File for 3D box filled with gas
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=16
+# number of grid cells in the y dimension
+ny=16
+# number of grid cells in the z dimension
+nz=16
+# final output time
+tout=100000.0
+# time interval for output
+outstep=100000.0
+# name of initial conditions
+init=Constant
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# density
+rho=1e4
+# velocity
+vx=0
+vy=0
+vz=0
+# pressure
+P=1.380658e-5
+# Magnetic Field
+Bx=0.0
+By=0.0
+Bz=0.0
+# value of gamma
+gamma=1.666666667
+
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..538984951
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,73 @@
+#
+# Parameter File for 3D Dai & Woodward MHD shock tube
+# Citation: Dai & Woodward 1998 "On The Diverrgence-Free Condition and
+# Conservation Laws in Numerical Simulations for Supersonic Magnetohydrodynamic
+# Flows"
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=64
+# number of grid cells in the z dimension
+nz=64
+# final output time
+tout=0.2
+# time interval for output
+outstep=0.2
+# name of initial conditions
+init=Riemann
+
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.08
+# velocity of left state
+vx_l=1.2
+vy_l=0.01
+vz_l=0.5
+# pressure of left state
+P_l=0.95
+# Magnetic field of the left state
+Bx_l=0.5641895835477563
+By_l=1.0155412503859613
+Bz_l=0.5641895835477563
+
+# density of right state
+rho_r=1.0
+# velocity of right state
+vx_r=0.0
+vy_r=0.0
+vz_r=0.0
+# pressure of right state
+P_r=1.0
+# Magnetic field of the right state
+Bx_r=0.5641895835477563
+By_r=1.1283791670955126
+Bz_r=0.5641895835477563
+
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=1.6666666666666667
+
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..3e4747551
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,71 @@
+#
+# Parameter File for 3D Einfeldt Strong Rarefaction MHD test
+# Citation: Einfeldt et al. 1991 "On Godunov-Type Methods near Low Densities"
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=32
+# number of grid cells in the y dimension
+ny=32
+# number of grid cells in the z dimension
+nz=32
+# final output time
+tout=0.16
+# time interval for output
+outstep=0.16
+# name of initial conditions
+init=Riemann
+
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.0
+# velocity of left state
+vx_l=-2.0
+vy_l=0.0
+vz_l=0.0
+# pressure of left state
+P_l=0.45
+# Magnetic field of the left state
+Bx_l=0.0
+By_l=0.5
+Bz_l=0.0
+
+# density of right state
+rho_r=1.0
+# velocity of right state
+vx_r=2.0
+vy_r=0.0
+vz_r=0.0
+# pressure of right state
+P_r=0.45
+# Magnetic field of the right state
+Bx_r=0.0
+By_r=0.5
+Bz_r=0.0
+
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=1.4
+
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..77ef94b72
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,61 @@
+#
+# Parameter File for the MHD Blast wavelength
+# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details.
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=50
+# number of grid cells in the y dimension
+ny=100
+# number of grid cells in the z dimension
+nz=50
+# final output time
+tout=0.2
+# time interval for output
+outstep=0.2
+# name of initial conditions
+init=MHD_Spherical_Blast
+# domain properties
+xmin=-0.5
+ymin=-0.75
+zmin=-0.5
+xlen=1.0
+ylen=1.5
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for MHD Blast Wave problem
+
+# initial density
+rho=1.0
+# velocity in the x direction
+vx=0.0
+# velocity in the y direction
+vy=0.0
+# velocity in the z direction
+vz=0.0
+# initial pressure outside the blast zone
+P=0.1
+# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo
+P_blast=10.0
+# The radius of the blast zone
+radius=0.1
+# magnetic field in the x direction. Equal to 1/sqrt(2)
+Bx=0.70710678118654746
+# magnetic field in the y direction. Equal to 1/sqrt(2)
+By=0.70710678118654746
+# magnetic field in the z direction
+Bz=0.0
+
+# value of gamma
+gamma=1.666666666666667
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..332e26eb2
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,42 @@
+#
+# Parameter File for the Orszag-Tang Vortex
+# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634)
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=64
+# number of grid cells in the z dimension
+nz=64
+# final output time
+tout=0.5
+# time interval for output
+outstep=0.5
+# name of initial conditions
+init=Orszag_Tang_Vortex
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+# type of boundary conditions
+xl_bcnd=1
+xu_bcnd=1
+yl_bcnd=1
+yu_bcnd=1
+zl_bcnd=1
+zu_bcnd=1
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for Orszag-Tang Vortex. This problem is defined for a specific set
+# of initial conditions which have been hard coded into the initial conditions
+# function. The only thing that needs set here is the adiabatic index
+
+# value of gamma
+gamma=1.666666666666667
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..a03aef938
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,74 @@
+#
+# Parameter File for 3D Ryu & Jones MHD shock tube 1a.
+# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics:
+# Algorithms and Tests for One-Dimensional Flow"
+#
+# Note: There are many shock tubes in this paper. This settings file is
+# specifically for shock tube 1a
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=64
+# number of grid cells in the z dimension
+nz=64
+# final output time
+tout=0.08
+# time interval for output
+outstep=0.08
+# name of initial conditions
+init=Riemann
+
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.0
+# velocity of left state
+vx_l=10.0
+vy_l=0.0
+vz_l=0.0
+# pressure of left state
+P_l=20.0
+# Magnetic field of the left state
+Bx_l=1.4104739588693909
+By_l=1.4104739588693909
+Bz_l=0.0
+
+# density of right state
+rho_r=1.0
+# velocity of right state
+vx_r=-10.0
+vy_r=0.0
+vz_r=0.0
+# pressure of right state
+P_r=1.0
+# Magnetic field of the right state
+Bx_r=1.4104739588693909
+By_r=1.4104739588693909
+Bz_r=0.0
+
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=1.6666666666666667
diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt
new file mode 100644
index 000000000..6596c2b01
--- /dev/null
+++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt
@@ -0,0 +1,74 @@
+#
+# Parameter File for 3D Ryu & Jones MHD shock tube 4d.
+# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics:
+# Algorithms and Tests for One-Dimensional Flow"
+#
+# Note: There are many shock tubes in this paper. This settings file is
+# specifically for shock tube 4d
+#
+
+################################################
+# number of grid cells in the x dimension
+nx=64
+# number of grid cells in the y dimension
+ny=64
+# number of grid cells in the z dimension
+nz=64
+# final output time
+tout=0.16
+# time interval for output
+outstep=0.16
+# name of initial conditions
+init=Riemann
+
+# domain properties
+xmin=0.0
+ymin=0.0
+zmin=0.0
+xlen=1.0
+ylen=1.0
+zlen=1.0
+
+# type of boundary conditions
+xl_bcnd=3
+xu_bcnd=3
+yl_bcnd=3
+yu_bcnd=3
+zl_bcnd=3
+zu_bcnd=3
+
+# path to output directory
+outdir=./
+
+#################################################
+# Parameters for 1D Riemann problems
+# density of left state
+rho_l=1.0
+# velocity of left state
+vx_l=0.0
+vy_l=0.0
+vz_l=0.0
+# pressure of left state
+P_l=1.0
+# Magnetic field of the left state
+Bx_l=0.7
+By_l=0.0
+Bz_l=0.0
+
+# density of right state
+rho_r=0.3
+# velocity of right state
+vx_r=0.0
+vy_r=0.0
+vz_r=1.0
+# pressure of right state
+P_r=0.2
+# Magnetic field of the right state
+Bx_r=0.7
+By_r=1.0
+Bz_r=0.0
+
+# location of initial discontinuity
+diaph=0.5
+# value of gamma
+gamma=1.6666666666666667
diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp
new file mode 100644
index 000000000..a14caa9a1
--- /dev/null
+++ b/src/system_tests/mhd_system_tests.cpp
@@ -0,0 +1,934 @@
+/*!
+ * \file mhd_system_tests.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains all the system tests for the MHD build type
+ *
+ */
+
+// STL includes
+#include <cmath>
+
+// External Libraries and Headers
+#include <gtest/gtest.h>
+
+// Local includes
+#include "../io/io.h"
+#include "../system_tests/system_tester.h"
+#include "../utils/testing_utilities.h"
+
+// =============================================================================
+// Test Suite: tMHDSYSTEMLinearWavesParameterizedAngle
+// =============================================================================
+/*!
+ * \defgroup tMHDSYSTEMLinearWavesParameterizedAngle
+ * \brief Test the linear waves initial conditions as a parameterized test
+ * with varying angles. Details in Gardiner & Stone 2008
+ *
+ */
+/// @{
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam<std::tuple<double, double, double, int>>
+{
+ public:
+  tMHDSYSTEMLinearWavesParameterizedAngle() : waveTest(false, true, false, false){};
+
+ protected:
+  system_test::SystemTestRunner waveTest;
+  inline static std::unordered_map<std::string, double> high_res_l2norms;
+
+  void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX,
+                         double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ,
+                         double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By,
+                         double const &rEigenVec_Bz, double const &pitch, double const &yaw, double const &domain,
+                         int const &domain_direction, double const &vx = 0.0, size_t const &N = 32)
+  {
+    // Constant for all tests
+    double const gamma = 5. / 3.;
+    double const tOut  = 2 * domain / waveSpeed;
+
+    // Define vector values
+    double x_len = domain, y_len = domain, z_len = domain;
+    int nx = N, ny = N, nz = N;
+    double vx_rot = vx, vy_rot = 0, vz_rot = 0;
+    double Bx_rot = 1, By_rot = 1.5, Bz_rot = 0;
+
+    double rEigenVec_Bx_rot = rEigenVec_Bx;
+    double rEigenVec_By_rot = rEigenVec_By;
+    double rEigenVec_Bz_rot = rEigenVec_Bz;
+
+    double rEigenVec_MomentumX_rot = rEigenVec_MomentumX;
+    double rEigenVec_MomentumY_rot = rEigenVec_MomentumY;
+    double rEigenVec_MomentumZ_rot = rEigenVec_MomentumZ;
+
+    switch (domain_direction) {
+      case 1:
+        x_len *= 2;
+        nx *= 2;
+        break;
+      case 2:  // swap X and Y
+        y_len *= 2;
+        ny *= 2;
+        break;
+      case 3:  // swap X and Z
+        z_len *= 2;
+        nz *= 2;
+        break;
+      default:
+        throw std::invalid_argument("Invalid value of domain_direction given to Set_Launch_Params");
+        break;
+    }
+
+    // Settings
+    waveTest.chollaLaunchParams.append(" nx=" + to_string_exact<int>(nx));
+    waveTest.chollaLaunchParams.append(" ny=" + to_string_exact<int>(ny));
+    waveTest.chollaLaunchParams.append(" nz=" + to_string_exact<int>(nz));
+    waveTest.chollaLaunchParams.append(" tout=" + to_string_exact<double>(tOut));
+    waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact<double>(tOut));
+    waveTest.chollaLaunchParams.append(" init=Linear_Wave");
+    waveTest.chollaLaunchParams.append(" xmin=0.0");
+    waveTest.chollaLaunchParams.append(" ymin=0.0");
+    waveTest.chollaLaunchParams.append(" zmin=0.0");
+    waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact<double>(x_len));
+    waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact<double>(y_len));
+    waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact<double>(z_len));
+    waveTest.chollaLaunchParams.append(" xl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" xu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" yl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" yu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" zl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" zu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" rho=1.0");
+    waveTest.chollaLaunchParams.append(" vx=" + to_string_exact<double>(vx_rot));
+    waveTest.chollaLaunchParams.append(" vy=" + to_string_exact<double>(vy_rot));
+    waveTest.chollaLaunchParams.append(" vz=" + to_string_exact<double>(vz_rot));
+    waveTest.chollaLaunchParams.append(" P=" + to_string_exact<double>(1 / gamma));
+    waveTest.chollaLaunchParams.append(" Bx=" + to_string_exact<double>(Bx_rot));
+    waveTest.chollaLaunchParams.append(" By=" + to_string_exact<double>(By_rot));
+    waveTest.chollaLaunchParams.append(" Bz=" + to_string_exact<double>(Bz_rot));
+    waveTest.chollaLaunchParams.append(" A='1e-6'");
+    waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact<double>(gamma));
+    waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact<double>(rEigenVec_rho));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact<double>(rEigenVec_MomentumX_rot));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact<double>(rEigenVec_MomentumY_rot));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact<double>(rEigenVec_MomentumZ_rot));
+    waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact<double>(rEigenVec_E));
+    waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact<double>(rEigenVec_Bx_rot));
+    waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact<double>(rEigenVec_By_rot));
+    waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact<double>(rEigenVec_Bz_rot));
+    waveTest.chollaLaunchParams.append(" pitch=" + to_string_exact<double>(pitch));
+    waveTest.chollaLaunchParams.append(" yaw=" + to_string_exact<double>(yaw));
+  }
+};
+
+// Fast Magnetosonic Waves Moving Left and Right
+// =============================================
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed              = 2.;
+  std::vector<int> const numTimeSteps = {214, 204, 220};
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 2;
+  double const rEigenVec_MomentumX = prefix * 4;
+  double const rEigenVec_MomentumY = prefix * -2;  // + for left wave
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * 4;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 9;
+
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+// Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.2E-7, 5.4E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(6.5E-8, 6.5E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(6.11E-8, 5.5E-8);
+#endif  // PCM
+
+  high_res_l2norms["fast_" + std::to_string(domain_direction)] = waveTest.getL2Norm();
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed              = 2.;
+  std::vector<int> const numTimeSteps = {214, 204, 220};
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 2;
+  double const rEigenVec_MomentumX = prefix * -4;
+  double const rEigenVec_MomentumY = prefix * 2;
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * 4;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 9;
+
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+// Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.2E-7, 5.4E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(6.5E-8, 6.5E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(6.1E-8, 5.5E-8);
+#endif  // PCM
+}
+
+// Slow Magnetosonic Waves Moving Left and Right
+// =============================================
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed              = 0.5;
+  std::vector<int> const numTimeSteps = {854, 813, 880};
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 4;
+  double const rEigenVec_MomentumX = prefix * 2;
+  double const rEigenVec_MomentumY = prefix * 4;
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * -2;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 3;
+
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.E-7, 4.E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(2.0E-8, 2.75E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(1.45E-9, 1.3E-9);
+#endif  // PCM
+
+  high_res_l2norms["slow_" + std::to_string(domain_direction)] = waveTest.getL2Norm();
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed              = 0.5;
+  std::vector<int> const numTimeSteps = {854, 813, 880};
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 4;
+  double const rEigenVec_MomentumX = prefix * -2;
+  double const rEigenVec_MomentumY = prefix * -4;
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * -2;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 3;
+
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.E-7, 4.E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(2.0E-8, 2.75E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(1.45E-9, 1.3E-9);
+#endif  // PCM
+}
+
+// Alfven Waves Moving Left and Right
+// =============================================
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed              = 1.0;
+  std::vector<int> const numTimeSteps = {427, 407, 440};
+
+  double const rEigenVec_rho       = 0;
+  double const rEigenVec_MomentumX = 0;
+  double const rEigenVec_MomentumY = 0;
+  double const rEigenVec_MomentumZ = -1;
+  double const rEigenVec_Bx        = 0;
+  double const rEigenVec_By        = 0;
+  double const rEigenVec_Bz        = 1;
+  double const rEigenVec_E         = 0;
+
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.E-7, 4.E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(3.0E-8, 3.0E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(1.95e-09, 2.16e-09);
+#endif  // PCM
+
+  high_res_l2norms["alfven_" + std::to_string(domain_direction)] = waveTest.getL2Norm();
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed              = 1.0;
+  std::vector<int> const numTimeSteps = {427, 407, 440};
+
+  double const rEigenVec_rho       = 0;
+  double const rEigenVec_MomentumX = 0;
+  double const rEigenVec_MomentumY = 0;
+  double const rEigenVec_MomentumZ = 1;
+  double const rEigenVec_Bx        = 0;
+  double const rEigenVec_By        = 0;
+  double const rEigenVec_Bz        = 1;
+  double const rEigenVec_E         = 0;
+
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.E-7, 4.E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(3.0E-8, 3.0E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(1.95e-09, 2.16e-09);
+#endif  // PCM
+}
+
+// Contact Wave Moving Right
+// ===================================
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed              = 1.0;
+  std::vector<int> const numTimeSteps = {641, 620, 654};
+
+  double const rEigenVec_rho       = 1;
+  double const rEigenVec_MomentumX = 1;
+  double const rEigenVec_MomentumY = 0;
+  double const rEigenVec_MomentumZ = 0;
+  double const rEigenVec_Bx        = 0;
+  double const rEigenVec_By        = 0;
+  double const rEigenVec_Bz        = 0;
+  double const rEigenVec_E         = 0.5;
+  double const velocityX           = waveSpeed;
+
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction,
+                    velocityX);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+// Check Results
+// Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(5.4E-7, 5.4E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(3.0E-8, 3.0E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(1.41e-09, 1.5E-09);
+#endif  // PCM
+
+  high_res_l2norms["contact_" + std::to_string(domain_direction)] = waveTest.getL2Norm();
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecondOrderConvergence)
+{
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Specific to this test
+  double const waveSpeed              = 2.;
+  std::vector<int> const numTimeSteps = {107, 102, 110};
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 2;
+  double const rEigenVec_MomentumX = prefix * 4;
+  double const rEigenVec_MomentumY = prefix * -2;
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * 4;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 9;
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0,
+                    16);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Run the wave
+  waveTest.runL1ErrorTest(7.0E-8, 1.5E-7);
+
+  // Check the scaling
+  double const low_res_l2norm = waveTest.getL2Norm();
+  testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)],
+                                   "", 0.2);
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence)
+{
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Specific to this test
+  double const waveSpeed              = 0.5;
+  std::vector<int> const numTimeSteps = {427, 407, 440};
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 4;
+  double const rEigenVec_MomentumX = prefix * 2;
+  double const rEigenVec_MomentumY = prefix * 4;
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * -2;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 3;
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0,
+                    16);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Run the wave
+  waveTest.runL1ErrorTest(5.4E-8, 8.0E-8);
+
+  // Check the scaling
+  double const low_res_l2norm = waveTest.getL2Norm();
+  testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)],
+                                   "", 0.2);
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence)
+{
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Specific to this test
+  double const waveSpeed              = 1.0;
+  std::vector<int> const numTimeSteps = {214, 204, 220};
+
+  double const rEigenVec_rho       = 0;
+  double const rEigenVec_MomentumX = 0;
+  double const rEigenVec_MomentumY = 0;
+  double const rEigenVec_MomentumZ = -1;
+  double const rEigenVec_Bx        = 0;
+  double const rEigenVec_By        = 0;
+  double const rEigenVec_Bz        = 1;
+  double const rEigenVec_E         = 0;
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0,
+                    16);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Run the wave
+  waveTest.runL1ErrorTest(4.5E-8, 8.0E-8);
+
+  // Check the scaling
+  double const low_res_l2norm = waveTest.getL2Norm();
+  testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)],
+                                   "", 0.2);
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence)
+{
+  // Get the test parameters
+  auto [pitch, yaw, domain, domain_direction] = GetParam();
+
+  // Specific to this test
+  double const waveSpeed              = 1.0;
+  std::vector<int> const numTimeSteps = {321, 310, 327};
+
+  double const rEigenVec_rho       = 1;
+  double const rEigenVec_MomentumX = 1;
+  double const rEigenVec_MomentumY = 0;
+  double const rEigenVec_MomentumZ = 0;
+  double const rEigenVec_Bx        = 0;
+  double const rEigenVec_By        = 0;
+  double const rEigenVec_Bz        = 0;
+  double const rEigenVec_E         = 0.5;
+  double const velocityX           = waveSpeed;
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction,
+                    velocityX, 16);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]);
+
+  // Run the wave
+  waveTest.runL1ErrorTest(5.0E-8, 8.0E-8);
+
+  // Check the scaling
+  double const low_res_l2norm = waveTest.getL2Norm();
+  testing_utilities::Check_Results(
+      4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.2);
+}
+
+INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle,
+                         ::testing::Values(std::make_tuple(0.0 * M_PI, 0.0 * M_PI, 0.5, 1),
+                                           std::make_tuple(0.0 * M_PI, 0.5 * M_PI, 0.5, 2),
+                                           std::make_tuple(0.5 * M_PI, 0.0 * M_PI, 0.5, 3)
+                                           // std::make_tuple(std::asin(2./3.),
+                                           // std::asin(2./std::sqrt(5.)), 1.5, 1)
+                                           ));
+/// @}
+// =============================================================================
+
+// =============================================================================
+// Test Suite: tMHDSYSTEMLinearWavesParameterizedMpi
+// =============================================================================
+/*!
+ * \defgroup tMHDSYSTEMLinearWavesParameterizedMpi
+ * \brief Test the linear waves initial conditions as a parameterized test
+ * with varying numbers of MPI ranks. Details in Gardiner & Stone 2008
+ *
+ */
+/// @{
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam<int>
+{
+ public:
+  tMHDSYSTEMLinearWavesParameterizedMpi() : waveTest(false, true, false, false){};
+
+ protected:
+  system_test::SystemTestRunner waveTest;
+
+  void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX,
+                         double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ,
+                         double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By,
+                         double const &rEigenVec_Bz)
+  {
+    // Constant for all tests
+    size_t const N      = 32;
+    double const gamma  = 5. / 3.;
+    double const domain = 0.5;
+    double const tOut   = 2 * domain / waveSpeed;
+
+    // Settings
+    waveTest.chollaLaunchParams.append(" nx=" + to_string_exact<int>(2 * N));
+    waveTest.chollaLaunchParams.append(" ny=" + to_string_exact<int>(N));
+    waveTest.chollaLaunchParams.append(" nz=" + to_string_exact<int>(N));
+    waveTest.chollaLaunchParams.append(" tout=" + to_string_exact<double>(tOut));
+    waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact<double>(tOut));
+    waveTest.chollaLaunchParams.append(" init=Linear_Wave");
+    waveTest.chollaLaunchParams.append(" xmin=0.0");
+    waveTest.chollaLaunchParams.append(" ymin=0.0");
+    waveTest.chollaLaunchParams.append(" zmin=0.0");
+    waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact<double>(2 * domain));
+    waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact<double>(domain));
+    waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact<double>(domain));
+    waveTest.chollaLaunchParams.append(" xl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" xu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" yl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" yu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" zl_bcnd=1");
+    waveTest.chollaLaunchParams.append(" zu_bcnd=1");
+    waveTest.chollaLaunchParams.append(" rho=1.0");
+    waveTest.chollaLaunchParams.append(" vx=0");
+    waveTest.chollaLaunchParams.append(" vy=0");
+    waveTest.chollaLaunchParams.append(" vz=0");
+    waveTest.chollaLaunchParams.append(" P=" + to_string_exact<double>(1 / gamma));
+    waveTest.chollaLaunchParams.append(" Bx=1");
+    waveTest.chollaLaunchParams.append(" By=1.5");
+    waveTest.chollaLaunchParams.append(" Bz=0");
+    waveTest.chollaLaunchParams.append(" A='1e-6'");
+    waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact<double>(gamma));
+    waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact<double>(rEigenVec_rho));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact<double>(rEigenVec_MomentumX));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact<double>(rEigenVec_MomentumY));
+    waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact<double>(rEigenVec_MomentumZ));
+    waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact<double>(rEigenVec_E));
+    waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact<double>(rEigenVec_Bx));
+    waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact<double>(rEigenVec_By));
+    waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact<double>(rEigenVec_Bz));
+  }
+};
+
+INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1, 2, 4));
+
+// Slow Magnetosonic Waves Moving Left and Right
+// =============================================
+TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed = 0.5;
+  int const numTimeSteps = 854;
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 4;
+  double const rEigenVec_MomentumX = prefix * 2;
+  double const rEigenVec_MomentumY = prefix * 4;
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * -2;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 3;
+
+  // Get the test parameters
+  waveTest.numMpiRanks = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps);
+
+  // Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.E-7, 4.E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(2.0E-8, 2.75E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(1.4E-9, 1.3E-9);
+#endif  // PCM
+}
+
+TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput)
+{
+  // Specific to this test
+  double const waveSpeed = 0.5;
+  int const numTimeSteps = 854;
+
+  double const prefix              = 1. / (2 * std::sqrt(5));
+  double const rEigenVec_rho       = prefix * 4;
+  double const rEigenVec_MomentumX = prefix * -2;
+  double const rEigenVec_MomentumY = prefix * -4;
+  double const rEigenVec_MomentumZ = prefix * 0;
+  double const rEigenVec_Bx        = prefix * 0;
+  double const rEigenVec_By        = prefix * -2;
+  double const rEigenVec_Bz        = prefix * 0;
+  double const rEigenVec_E         = prefix * 3;
+
+  // Get the test parameters
+  waveTest.numMpiRanks = GetParam();
+
+  // Set the launch parameters
+  Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ,
+                    rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz);
+
+  // Set the number of timesteps
+  waveTest.setFiducialNumTimeSteps(numTimeSteps);
+
+  // Check Results. Values based on results in Gardiner & Stone 2008
+#ifdef PCM
+  waveTest.runL1ErrorTest(4.E-7, 4.E-7);
+#elif defined(PLMC)
+  waveTest.runL1ErrorTest(2.0E-8, 2.8E-8);
+#elif defined(PPMC)
+  waveTest.runL1ErrorTest(1.4E-9, 1.3E-9);
+#endif  // PCM
+}
+
+/// @}
+// =============================================================================
+
+// =============================================================================
+// Test Suite: tMHDSYSTEMParameterizedMpi
+// =============================================================================
+/*!
+ * \defgroup tMHDSYSTEMParameterizedMpi
+ * \brief Test initial conditions as a parameterized test with varying numbers of MPI ranks
+ *
+ */
+/// @{
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tMHDSYSTEMParameterizedMpi : public ::testing::TestWithParam<size_t>
+{
+ protected:
+  system_test::SystemTestRunner test_runner;
+};
+INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMParameterizedMpi, ::testing::Values(1, 2, 4));
+
+/// Test constant state with all magnetic fields set to zero
+TEST_P(tMHDSYSTEMParameterizedMpi, ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  test_runner.runTest();
+}
+
+/// Test constant state with all magnetic fields set to one
+TEST_P(tMHDSYSTEMParameterizedMpi, ConstantWithMagneticFieldCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  test_runner.runTest();
+}
+
+/// Test the MHD Einfeldt Strong Rarefaction (Einfeldt et al. 1991)
+TEST_P(tMHDSYSTEMParameterizedMpi, EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  test_runner.runTest();
+}
+
+/// Test the Brio & Wu Shock Tube (Brio & Wu 1988)
+TEST_P(tMHDSYSTEMParameterizedMpi, BrioAndWuShockTubeCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  test_runner.runTest();
+}
+
+/// Test the Dai & Woodward Shock Tube (Dai & Woodward 1998)
+TEST_P(tMHDSYSTEMParameterizedMpi, DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  test_runner.runTest();
+}
+
+/// Test the Ryu & Jones 1a Shock Tube (Ryu & Jones 1995)
+TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  test_runner.runTest();
+}
+
+/// Test the Ryu & Jones 4d Shock Tube (Ryu & Jones 1995)
+TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  // This test is particularly sensitive to minor changes in the initial conditions, the kind of changes that are
+  // expected from compiler to compiler. As such the limits have been loosened slightly.
+  test_runner.setFixedEpsilon(7.3E-12);
+  test_runner.runTest();
+}
+
+/// Test the Advecting Field Loop
+TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+
+  // Only do the L2 Norm test. The regular cell-to-cell comparison is brittle for this test across systems
+  test_runner.runTest(true, 3.9E-8, 2.25E-6);
+}
+
+/// Test the MHD Blast Wave
+TEST_P(tMHDSYSTEMParameterizedMpi, MhdBlastWaveCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+
+  // Only do the L2 Norm test. The regular cell-to-cell comparison is brittle for this test across systems
+  test_runner.runTest(true, 2.2E-4, 0.35);
+}
+
+/// Test the Orszag-Tang Vortex
+TEST_P(tMHDSYSTEMParameterizedMpi, OrszagTangVortexCorrectInputExpectCorrectOutput)
+{
+  test_runner.numMpiRanks = GetParam();
+  test_runner.runTest();
+}
+/// @}
+// =============================================================================
+
+// =============================================================================
+// Test Suite: tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization
+// =============================================================================
+/*!
+ * \defgroup tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization
+ * \brief Test the circularly polarized Alfven Wave conditions as a parameterized test with varying polarizations.
+ * Details in Gardiner & Stone 2008
+ *
+ */
+/// @{
+// NOLINTNEXTLINE(readability-identifier-naming)
+class tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization : public ::testing::TestWithParam<double>
+{
+ public:
+  tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization() : cpawTest(false, true, false, false){};
+
+ protected:
+  system_test::SystemTestRunner cpawTest;
+
+  void Set_Launch_Params(double const &polarization, double const &vx)
+  {
+    // Constant for all tests
+    size_t const N      = 32;
+    double const length = 1.5;
+    double const gamma  = 5. / 3.;
+    double const tOut   = 1.0;
+    double const pitch  = std::asin(2. / 3.);
+    double const yaw    = std::asin(2. / std::sqrt(5.));
+
+    // Domain settings
+    double const x_len = 2. * length, y_len = length, z_len = length;
+    int const nx = 2 * N, ny = N, nz = N;
+
+    // Settings
+    cpawTest.chollaLaunchParams.append(" nx=" + to_string_exact<int>(nx));
+    cpawTest.chollaLaunchParams.append(" ny=" + to_string_exact<int>(ny));
+    cpawTest.chollaLaunchParams.append(" nz=" + to_string_exact<int>(nz));
+    cpawTest.chollaLaunchParams.append(" tout=" + to_string_exact<double>(tOut));
+    cpawTest.chollaLaunchParams.append(" outstep=" + to_string_exact<double>(tOut));
+    cpawTest.chollaLaunchParams.append(" init=Circularly_Polarized_Alfven_Wave");
+    cpawTest.chollaLaunchParams.append(" xmin=0.0");
+    cpawTest.chollaLaunchParams.append(" ymin=0.0");
+    cpawTest.chollaLaunchParams.append(" zmin=0.0");
+    cpawTest.chollaLaunchParams.append(" xlen=" + to_string_exact<double>(x_len));
+    cpawTest.chollaLaunchParams.append(" ylen=" + to_string_exact<double>(y_len));
+    cpawTest.chollaLaunchParams.append(" zlen=" + to_string_exact<double>(z_len));
+    cpawTest.chollaLaunchParams.append(" xl_bcnd=1");
+    cpawTest.chollaLaunchParams.append(" xu_bcnd=1");
+    cpawTest.chollaLaunchParams.append(" yl_bcnd=1");
+    cpawTest.chollaLaunchParams.append(" yu_bcnd=1");
+    cpawTest.chollaLaunchParams.append(" zl_bcnd=1");
+    cpawTest.chollaLaunchParams.append(" zu_bcnd=1");
+    cpawTest.chollaLaunchParams.append(" polarization=" + to_string_exact<double>(polarization));
+    cpawTest.chollaLaunchParams.append(" vx=" + to_string_exact<double>(vx));
+    cpawTest.chollaLaunchParams.append(" gamma=" + to_string_exact<double>(gamma));
+    cpawTest.chollaLaunchParams.append(" pitch=" + to_string_exact<double>(pitch));
+    cpawTest.chollaLaunchParams.append(" yaw=" + to_string_exact<double>(yaw));
+  }
+};
+
+// Moving wave with right and left polarization
+// =============================================
+TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, MovingWaveCorrectInputExpectCorrectOutput)
+{
+  // Get the test parameter
+  double const polarization = GetParam();
+
+  // Set the wave to be moving
+  double const vx = 0.0;
+
+// Set allowed errors
+#ifdef PCM
+  // Set the number of timesteps
+  cpawTest.setFiducialNumTimeSteps(82);
+  double const allowedL1Error = 6.5E-2;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 4.6E-2;
+#elif defined(PLMC)
+  // Set the number of timesteps
+  cpawTest.setFiducialNumTimeSteps(84);
+  double const allowedL1Error = 5.0E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 5.0E-3;
+#elif defined(PPMC)
+  // Set the number of timesteps
+  cpawTest.setFiducialNumTimeSteps(84);
+  double const allowedL1Error = 4.0E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 3.0E-3;
+#elif defined(PLMP)
+  double const allowedL1Error = 5.0E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 5.0E-3;
+#elif defined(PPMP)
+  double const allowedL1Error = 4.0E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 3.0E-3;
+#endif
+
+  // Set the launch parameters
+  Set_Launch_Params(polarization, vx);
+
+  // Check Results
+  cpawTest.runL1ErrorTest(allowedL1Error, allowedError);
+}
+
+// Standing wave with right and left polarization
+// =============================================
+TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, StandingWaveCorrectInputExpectCorrectOutput)
+{
+  // Get the test parameter
+  double const polarization = GetParam();
+
+  // Set the wave to be standing
+  double const vx = -polarization;
+
+// Set allowed errors
+#ifdef PCM
+  // Set the number of timesteps
+  cpawTest.setFiducialNumTimeSteps(130);
+  double const allowedL1Error = 1.8E-2;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 1.7E-2;
+#elif defined(PLMC)
+  // Set the number of timesteps
+  cpawTest.setFiducialNumTimeSteps(130);
+  double const allowedL1Error = 2.0E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 2.0E-3;
+#elif defined(PPMC)
+  // Set the number of timesteps
+  cpawTest.setFiducialNumTimeSteps(130);
+  double const allowedL1Error = 1.3E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 1.3E-3;
+#elif defined(PLMP)
+  double const allowedL1Error = 2.0E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 2.0E-3;
+#elif defined(PPMP)
+  double const allowedL1Error = 1.3E-3;  // Based on results in Gardiner & Stone 2008
+  double const allowedError   = 1.3E-3;
+#endif
+
+  // Set the launch parameters
+  Set_Launch_Params(polarization, vx);
+
+  // Check Results
+  cpawTest.runL1ErrorTest(allowedL1Error, allowedError);
+}
+
+INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization,
+                         ::testing::Values(1.0, -1.0));
+/// @}
+// =============================================================================
diff --git a/src/system_tests/particles_system_tests.cpp b/src/system_tests/particles_system_tests.cpp
index 7f6d4552e..4b6b36575 100644
--- a/src/system_tests/particles_system_tests.cpp
+++ b/src/system_tests/particles_system_tests.cpp
@@ -20,11 +20,10 @@
  *
  */
 /// @{
-TEST(tPARTICLESSYSTEMSphericalCollapse,
-     CorrectInputExpectCorrectOutput)
+TEST(tPARTICLESSYSTEMSphericalCollapse, DISABLED_CorrectInputExpectCorrectOutput)
 {
-    systemTest::SystemTestRunner collapseTest(true);
-    collapseTest.runTest();
+  system_test::SystemTestRunner collapseTest(true);
+  collapseTest.runTest();
 }
 /// @}
 // =============================================================================
\ No newline at end of file
diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp
index c59e6e770..1888fd752 100644
--- a/src/system_tests/system_tester.cpp
+++ b/src/system_tests/system_tester.cpp
@@ -7,19 +7,23 @@
 
 // STL includes
 #include <stdlib.h>
-#include <fstream>
-#include <stdexcept>
-#include <sstream>
-#include <cstdio>
+
 #include <algorithm>
 #include <cmath>
+#include <cstdio>
+#include <filesystem>
+#include <fstream>
+#include <limits>
 #include <numeric>
+#include <sstream>
+#include <stdexcept>
 
 // External Libraries and Headers
 #include <gtest/gtest.h>
 
 // Local includes
-#include "../system_tests/system_tester.h" // Include the header file
+#include "../io/io.h"
+#include "../system_tests/system_tester.h"  // Include the header file
 #include "../utils/testing_utilities.h"
 
 // =============================================================================
@@ -27,363 +31,473 @@
 // =============================================================================
 
 // =============================================================================
-void systemTest::SystemTestRunner::runTest()
+void system_test::SystemTestRunner::runTest(bool const &compute_L2_norm_only, double const &maxAllowedL1Error,
+                                            double const &maxAllowedError)
 {
-    /// Only run if this variable is set to `true`. Generally this and
-    /// globalCompareSystemTestResults should only be used for large MPI / tests
-    /// where the user wishes to separate the execution of cholla and the /
-    /// comparison of results onto different machines/jobs
-    if (globalRunCholla)
-    {
-        // Launch Cholla. Note that this dumps all console output to the console
-        // log file as requested by the user.
-        launchCholla();
+  /// Only run if this variable is set to `true`. Generally this and
+  /// globalCompareSystemTestResults should only be used for large MPI / tests
+  /// where the user wishes to separate the execution of cholla and the /
+  /// comparison of results onto different machines/jobs
+  if (globalRunCholla) {
+    // Launch Cholla. Note that this dumps all console output to the console
+    // log file as requested by the user.
+    launchCholla();
+  }
+
+  /// If set to false then no comparison will be performed. Generally this and
+  /// globalRunCholla should only be used for large MPI tests where the user
+  /// wishes to separate the execution of cholla and the comparison of results
+  /// onto different machines/jobs
+  if (not globalCompareSystemTestResults) {
+    return;
+  }
+
+  // Make sure we have all the required data files and open the test data file
+  _testHydroFieldsFileVec.resize(numMpiRanks);
+  _testParticlesFileVec.resize(numMpiRanks);
+  FnameTemplate fname_template(true, _outputDirectory);
+  for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) {
+    // Load the hydro data
+    std::string filePath = fname_template.format_fname(1, fileIndex, "");
+    if (_hydroDataExists and std::filesystem::exists(filePath)) {
+      _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY);
     }
 
-    /// If set to false then no comparison will be performed. Generally this and
-    /// globalRunCholla should only be used for large MPI tests where the user
-    /// wishes to separate the execution of cholla and the comparison of results
-    /// onto different machines/jobs
-    if (not globalCompareSystemTestResults) return;
-
-    // Make sure we have all the required data files and open the test data file
-    _testHydroFieldsFileVec.resize(numMpiRanks);
-    _testParticlesFileVec.resize(numMpiRanks);
-    for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++)
-    {
-        // Load the hydro data
-        if (_hydroDataExists)
-        {
-            std::string fileName = "/1.h5." + std::to_string(fileIndex);
-            _checkFileExists(_outputDirectory + fileName);
-            _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName,
-                                                        H5F_ACC_RDONLY);
-        }
-
-        // Load the particles data
-        if (_particleDataExists)
-        {
-            std::string fileName = "/1_particles.h5." + std::to_string(fileIndex);
-            _checkFileExists(_outputDirectory + fileName);
-            _testParticlesFileVec[fileIndex].openFile(_outputDirectory + fileName,
-                                                      H5F_ACC_RDONLY);
-        }
+    // Load the particles data
+    filePath = fname_template.format_fname(1, fileIndex, "_particles");
+    if (_particleDataExists and std::filesystem::exists(filePath)) {
+      _testParticlesFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY);
     }
+  }
 
-    // If this is a particle build then read in the IDs and generate the sorting
-    // vector
-    if (_particleDataExists)
-    {
-        _testParticleIDs     = _loadTestParticleData("particle_IDs");
+  // If this is a particle build then read in the IDs and generate the sorting
+  // vector
+  if (_particleDataExists) {
+    _testParticleIDs = _loadTestParticleData("particle_IDs");
 
-        if (_fiducialFileExists) _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs");
+    if (_fiducialFileExists) {
+      _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs");
     }
-
-    // Get the list of test dataset names
-    if (_hydroDataExists)
-        _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]);
-    if (_particleDataExists)
-    {
-        // Load the data, replace the density value with the new name, then append
-        std::vector<std::string> particleNames = _findDataSetNames(_testParticlesFileVec[0]);
-        auto iter = std::find(particleNames.begin(), particleNames.end(), "density");
-        *iter = "particle_density";
-
-        _testDataSetNames.insert(_testDataSetNames.end(),
-                                 particleNames.begin(),
-                                 particleNames.end());
+  }
+
+  // Get the list of test dataset names
+  if (_hydroDataExists) {
+    _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]);
+  }
+  if (_particleDataExists) {
+    // Load the data, replace the density value with the new name, then append
+    std::vector<std::string> particleNames = _findDataSetNames(_testParticlesFileVec[0]);
+    auto iter                              = std::find(particleNames.begin(), particleNames.end(), "density");
+    *iter                                  = "particle_density";
+
+    _testDataSetNames.insert(_testDataSetNames.end(), particleNames.begin(), particleNames.end());
+  }
+
+  // Start Performing Checks
+  // =======================
+  // Check the number of time steps
+  if (_compareNumTimeSteps) {
+    _checkNumTimeSteps();
+  }
+
+  // Check that the test file has as many, or more, datasets than the fiducial
+  // file. Provide a warning if the datasets are not the same size
+  EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size())
+      << std::endl
+      << "Warning: The test data has " << _testDataSetNames.size() << " datasets and the fiducial data has "
+      << _fiducialDataSetNames.size() << " datasets" << std::endl
+      << std::endl;
+
+  // Compute the L1 Error.
+  L2Norm_         = 0;
+  double maxError = 0;
+  // Loop over the datasets to be tested
+  for (auto const &dataSetName : _fiducialDataSetNames) {
+    // check that the test data has the dataset in it
+    ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1)
+        << "The test data does not contain the dataset '" + dataSetName + "' or contains it more than once.";
+
+    // Get data vectors
+    std::vector<size_t> testDims(3, 1);
+    std::vector<double> testData;
+    std::vector<double> fiducialData;
+    // This is just a vector of all the different dataset names for
+    // particles to help choose whether to call _loadTestParticleData
+    // or loadTestFieldData
+    std::vector<std::string> particleIDs = {"particle_IDs", "pos_x", "pos_y", "pos_z", "vel_x", "vel_y", "vel_z"};
+    if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName) != particleIDs.end()) {
+      // This is a particle data set
+
+      // Set some basic parameters
+      testDims[0] = _testTotalNumParticles;
+
+      // Load in the data. Note the special handling for particle_IDs
+      if (dataSetName == "particle_IDs") {
+        testData     = _testParticleIDs;
+        fiducialData = _fiducialParticleIDs;
+      } else {
+        testData     = _loadTestParticleData(dataSetName);
+        fiducialData = _loadFiducialParticleData(dataSetName);
+      }
+    } else {
+      // This is a field data set
+      testData = loadTestFieldData(dataSetName, testDims);
+      // Get fiducial data
+      fiducialData = _loadFiducialFieldData(dataSetName);
     }
 
-    // Start Performing Checks
-    // =======================
-    // Check the number of time steps
-    if (_compareNumTimeSteps) _checkNumTimeSteps();
-
-    // Check that the test file has as many, or more, datasets than the fiducial
-    // file. Provide a warning if the datasets are not the same size
-    EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size())
-        << std::endl
-        << "Warning: The test data has "
-        << _testDataSetNames.size()
-        <<  " datasets and the fiducial data has "
-        << _fiducialDataSetNames.size()
-        << " datasets" << std::endl << std::endl;
-
-    // Loop over the datasets to be tested
-    for (auto dataSetName: _fiducialDataSetNames)
-    {
-        // check that the test data has the dataset in it
-        ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1)
-            << "The test data does not contain the dataset '" + dataSetName
-            + "' or contains it more than once.";
-
-        // Get data vectors
-        std::vector<size_t> testDims(3,1);
-        std::vector<double> testData;
-        std::vector<double> fiducialData;
-        // This is just a vector of all the different dataset names for
-        // particles to help choose whether to call _loadTestParticleData
-        // or loadTestFieldData
-        std::vector<std::string> particleIDs = {"particle_IDs",
-                                                "pos_x",
-                                                "pos_y",
-                                                "pos_z",
-                                                "vel_x",
-                                                "vel_y",
-                                                "vel_z"};
-        if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName)
-            != particleIDs.end())
-        {
-            // This is a particle data set
-
-            // Set some basic parameters
-            testDims[0] = _testTotalNumParticles;
-
-            // Load in the data. Note the special handling for particle_IDs
-            if (dataSetName == "particle_IDs")
-            {
-                testData     = _testParticleIDs;
-                fiducialData = _fiducialParticleIDs;
-            }
-            else
-            {
-                testData     = _loadTestParticleData(dataSetName);
-                fiducialData = _loadFiducialParticleData(dataSetName);
-            }
-        }
-        else
-        {
-            // This is a field data set
-            testData = loadTestFieldData(dataSetName,
-                                          testDims);
-            // Get fiducial data
-            fiducialData = _loadFiducialFieldData(dataSetName);
+    // Check that they're the same length
+    ASSERT_EQ(fiducialData.size(), testData.size())
+        << "The fiducial and test '" << dataSetName << "' datasets are not the same length";
+
+    // Compare values
+    double L1_error     = 0.0;
+    double fp_sum_error = 0.0;
+    for (size_t i = 0; i < testDims[0]; i++) {
+      for (size_t j = 0; j < testDims[1]; j++) {
+        for (size_t k = 0; k < testDims[2]; k++) {
+          size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
+
+          if (compute_L2_norm_only) {
+            double const diff = std::abs(fiducialData.at(index) - testData.at(index));
+
+            maxError = std::max(maxError, diff);
+
+            // Perform a Kahan sum to maintain precision in the result
+            double const y = diff - fp_sum_error;
+            double const t = L1_error + y;
+            fp_sum_error   = (t - L1_error) - y;
+            L1_error       = t;
+          } else {
+            // Check for equality and iff not equal return difference
+            double absoluteDiff;
+            int64_t ulpsDiff;
+            bool areEqual = testing_utilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff,
+                                                              ulpsDiff, _fixedEpsilon);
+            ASSERT_TRUE(areEqual) << std::endl
+                                  << "Difference in " << dataSetName << " dataset at [" << i << "," << j << "," << k
+                                  << "]" << std::endl
+                                  << "The fiducial value is:       " << fiducialData[index] << std::endl
+                                  << "The test value is:           " << testData[index] << std::endl
+                                  << "The absolute difference is:  " << absoluteDiff << std::endl
+                                  << "The ULP difference is:       " << ulpsDiff << std::endl;
+          }
         }
+      }
+    }
 
-        // Check that they're the same length
-        ASSERT_EQ(fiducialData.size(), testData.size())
-                                        << "The fiducial and test '"
-                                        << dataSetName
-                                        << "' datasets are not the same length";
-
-        // Compare values
-        for (size_t i = 0; i < testDims[0]; i++)
-        {
-            for (size_t j = 0; j < testDims[1]; j++)
-            {
-                for (size_t k = 0; k < testDims[2]; k++)
-                {
-                    size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
-
-                    // Check for equality and iff not equal return difference
-                    double absoluteDiff;
-                    int64_t ulpsDiff;
-                    // Fixed epsilon is changed from the default since AMD/Clang
-                    // appear to differ from NVIDIA/GCC/XL by roughly 1E-12
-                    double fixedEpsilon = 5.0E-12;
-                    bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index),
-                                                                     testData.at(index),
-                                                                     absoluteDiff,
-                                                                     ulpsDiff,
-                                                                     fixedEpsilon);
-                    ASSERT_TRUE(areEqual)
-                        << std::endl
-                        << "Difference in "
-                        << dataSetName
-                        << " dataset at ["
-                        << i << "," << j << "," << k <<"]" << std::endl
-                        << "The fiducial value is:       " << fiducialData[index] << std::endl
-                        << "The test value is:           " << testData[index]     << std::endl
-                        << "The absolute difference is:  " << absoluteDiff        << std::endl
-                        << "The ULP difference is:       " << ulpsDiff            << std::endl;
-                }
-            }
-        }
+    if (compute_L2_norm_only) {
+      L1_error /= static_cast<double>(testDims[0] * testDims[1] * testDims[2]);
+      L2Norm_ += L1_error * L1_error;
     }
+  }
+
+  if (compute_L2_norm_only) {
+    // Check the L2 Norm
+    L2Norm_ = std::sqrt(L2Norm_);
+    EXPECT_LT(L2Norm_, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value";
+
+    // Check the Max Error
+    EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value";
+  }
 }
 // =============================================================================
 
 // =============================================================================
-void systemTest::SystemTestRunner::launchCholla()
+void system_test::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError)
 {
+  /// Only run if this variable is set to `true`. Generally this and
+  /// globalCompareSystemTestResults should only be used for large MPI / tests
+  /// where the user wishes to separate the execution of cholla and the /
+  /// comparison of results onto different machines/jobs
+  if (globalRunCholla) {
     // Launch Cholla. Note that this dumps all console output to the console
     // log file as requested by the user.
-    std::string const chollaRunCommand = globalMpiLauncher.getString() + " "
-                                        + std::to_string(numMpiRanks) + " "
-                                        + _chollaPath + " "
-                                        + _chollaSettingsPath + " "
-                                        + chollaLaunchParams + " "
-                                        + "outdir=" + _outputDirectory + "/"
-                                        + " >> " + _consoleOutputPath + " 2>&1 ";
-    auto returnEcho = system(("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath).c_str());
-    auto returnLaunch = system((chollaRunCommand).c_str());
-    EXPECT_EQ(returnEcho, 0)
-        << "Warning: Echoing the launch command to the console output file "
-        << "returned a non-zero exit status code. Launch command is `"
-        << chollaRunCommand <<  "`" << std::endl;
-    EXPECT_EQ(returnLaunch, 0)
-        << "Warning: Launching Cholla returned a non-zero exit status. Likely "
-        << "failed to launch. Please see the log files" << std::endl;
-
-    _safeMove("run_output.log", _outputDirectory);
-    // TODO: instead of commenting out, change to check if exist
-    //_safeMove("run_timing.log", _outputDirectory);
+    launchCholla();
+  }
+
+  // Check that there is hydro data and no particle data
+  if (_particleDataExists) {
+    std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest does not support particles";
+    throw std::runtime_error(errMessage);
+  }
+  if (not _hydroDataExists) {
+    std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest requires hydro data";
+    throw std::runtime_error(errMessage);
+  }
+
+  /// If set to false then no comparison will be performed. Generally this and
+  /// globalRunCholla should only be used for large MPI tests where the user
+  /// wishes to separate the execution of cholla and the comparison of results
+  /// onto different machines/jobs
+  if (not globalCompareSystemTestResults) {
+    return;
+  }
+
+  // Make sure we have all the required data files and open the data files
+  _testHydroFieldsFileVec.resize(numMpiRanks);
+  std::vector<H5::H5File> initialHydroFieldsFileVec(numMpiRanks);
+  FnameTemplate fname_template(true, _outputDirectory);
+  for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) {
+    // Initial time data
+    std::string filePath = fname_template.format_fname(0, fileIndex, "");
+    if (std::filesystem::exists(filePath)) {
+      initialHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY);
+    }
+
+    // Final time data
+    filePath = fname_template.format_fname(1, fileIndex, "");
+    if (std::filesystem::exists(filePath)) {
+      _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY);
+    }
+  }
+
+  // Get the list of test dataset names
+  _fiducialDataSetNames = _findDataSetNames(initialHydroFieldsFileVec[0]);
+  _testDataSetNames     = _findDataSetNames(_testHydroFieldsFileVec[0]);
+
+  // Start Performing Checks
+  // =======================
+  // Check the number of time steps
+  if (_compareNumTimeSteps) {
+    _checkNumTimeSteps();
+  }
+
+  // Check that the test file has as many, or more, datasets than the fiducial
+  // file. Provide a warning if the datasets are not the same size
+  EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size())
+      << std::endl
+      << "Warning: The test data has " << _testDataSetNames.size() << " datasets and the fiducial data has "
+      << _fiducialDataSetNames.size() << " datasets" << std::endl
+      << std::endl;
+
+  // Loop over the datasets to be tested
+  L2Norm_         = 0;
+  double maxError = 0;
+  for (auto const &dataSetName : _fiducialDataSetNames) {
+    if (dataSetName == "GasEnergy") {
+      continue;
+    }
+
+    // check that the test data has the dataset in it
+    ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1)
+        << "The test data does not contain the dataset '" + dataSetName + "' or contains it more than once.";
+
+    // Get data vectors
+    std::vector<size_t> initialDims(3, 1);
+    std::vector<double> initialData;
+    std::vector<size_t> finalDims(3, 1);
+    std::vector<double> finalData;
+
+    // This is a field data set
+    initialData = loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec);
+    // Get fiducial data
+    finalData = loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec);
+
+    // Check that they're the same length
+    ASSERT_EQ(initialData.size(), finalData.size())
+        << "The initial and final '" << dataSetName << "' datasets are not the same length";
+
+    // Compute the L1 Error.
+    double L1_error     = 0.0;
+    double fp_sum_error = 0.0;
+    for (size_t i = 0; i < initialData.size(); i++) {
+      double const diff = std::abs(initialData.at(i) - finalData.at(i));
+
+      maxError = std::max(maxError, diff);
+
+      // Perform a Kahan sum to maintain precision in the result
+      double const y = diff - fp_sum_error;
+      double const t = L1_error + y;
+      fp_sum_error   = (t - L1_error) - y;
+      L1_error       = t;
+    }
+
+    L1_error /= static_cast<double>(initialDims[0] * initialDims[1] * initialDims[2]);
+    L2Norm_ += L1_error * L1_error;
+
+    // Perform the correctness check
+    EXPECT_LT(L1_error, maxAllowedL1Error)
+        << "the L1 error for the " << dataSetName << " data has exceeded the allowed value";
+  }
+
+  // Check the L2 Norm
+  L2Norm_ = std::sqrt(L2Norm_);
+  EXPECT_LT(L2Norm_, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value";
+
+  // Check the Max Error
+  EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value";
 }
 // =============================================================================
 
 // =============================================================================
-void systemTest::SystemTestRunner::openHydroTestData()
+void system_test::SystemTestRunner::launchCholla()
 {
-   _testHydroFieldsFileVec.resize(numMpiRanks);
-    for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++)
-    {
-      std::string fileName = "/1.h5." + std::to_string(fileIndex);
-      _checkFileExists(_outputDirectory + fileName);
-      _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName,
-                                                        H5F_ACC_RDONLY);
-    }
+  // Launch Cholla. Note that this dumps all console output to the console
+  // log file as requested by the user.
+  std::string const chollaRunCommand = globalMpiLauncher.getString() + " " + std::to_string(numMpiRanks) + " " +
+                                       _chollaPath + " " + _chollaSettingsPath + " " + chollaLaunchParams + " " +
+                                       "outdir=" + _outputDirectory + "/" + " >> " + _consoleOutputPath + " 2>&1 ";
+  auto returnEcho   = system(("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath).c_str());
+  auto returnLaunch = system((chollaRunCommand).c_str());
+  EXPECT_EQ(returnEcho, 0) << "Warning: Echoing the launch command to the console output file "
+                           << "returned a non-zero exit status code. Launch command is `" << chollaRunCommand << "`"
+                           << std::endl;
+  EXPECT_EQ(returnLaunch, 0) << "Warning: Launching Cholla returned a non-zero exit status. Likely "
+                             << "failed to launch. Please see the log files" << std::endl;
+
+  // Move the output files to the correct spots
+  std::filesystem::rename(::globalChollaRoot.getString() + "/run_output.log", _outputDirectory + "/run_output.log");
+  try {
+    std::filesystem::rename(::globalChollaRoot.getString() + "/run_timing.log", _outputDirectory + "/run_timing.log");
+  } catch (const std::filesystem::filesystem_error &error) {
+    // This file might not exist and isn't required so don't worry if it doesn't exist
+  }
 }
 // =============================================================================
 
 // =============================================================================
-void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName,
-                                                   std::vector<double> const &dataVec)
+void system_test::SystemTestRunner::openHydroTestData()
 {
-    // First check if there's a fiducial data file
-    if (_fiducialFileExists)
-    {
-        std::string errMessage = "Error: Fiducial data file already exists for test '"
-                                 + _fullTestFileName
-                                 + "' and cannot be overwritten.";
-        throw std::runtime_error(errMessage);
+  _testHydroFieldsFileVec.resize(numMpiRanks);
+  for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) {
+    std::string filePath = FnameTemplate(true, _outputDirectory).format_fname(1, fileIndex, "");
+    if (std::filesystem::exists(filePath)) {
+      _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY);
     }
+  }
+}
+// =============================================================================
 
-    // Put new vector into map
-    _fiducialDataSets[fieldName] = dataVec;
+// =============================================================================
+void system_test::SystemTestRunner::setFiducialData(std::string const &fieldName, std::vector<double> const &dataVec)
+{
+  // First check if there's a fiducial data file
+  if (_fiducialDataSets.count(fieldName) > 0) {
+    std::string errMessage =
+        "Error: Fiducial dataset for field '" + fieldName + "' already exists and cannot be overwritten";
+    throw std::runtime_error(errMessage);
+  }
+
+  // Put new vector into map
+  _fiducialDataSets[fieldName] = dataVec;
 }
 // =============================================================================
 
 // =============================================================================
-std::vector<double> systemTest::SystemTestRunner::generateConstantData(
-                                                        double const &value,
-                                                        size_t const &nx,
-                                                        size_t const &ny,
-                                                        size_t const &nz)
+std::vector<double> system_test::SystemTestRunner::generateConstantData(double const &value, size_t const &nx,
+                                                                        size_t const &ny, size_t const &nz)
 {
-    size_t const length = nx*ny*nz;
-    std::vector<double> outVec(length);
-    for (size_t i = 0; i < length; i++)
-    {
-        outVec[i] = value;
-    }
-    return outVec;
+  size_t const length = nx * ny * nz;
+  std::vector<double> outVec(length);
+  for (size_t i = 0; i < length; i++) {
+    outVec[i] = value;
+  }
+  return outVec;
 }
 // =============================================================================
 
 // =============================================================================
-std::vector<double> systemTest::SystemTestRunner::generateSineData(
-                                                        double const &offset,
-                                                        double const &amplitude,
-                                                        double const &kx,
-                                                        double const &ky,
-                                                        double const &kz,
-                                                        double const &phase,
-                                                        size_t const &nx,
-                                                        size_t const &ny,
-                                                        size_t const &nz)
+std::vector<double> system_test::SystemTestRunner::generateSineData(double const &offset, double const &amplitude,
+                                                                    double const &kx, double const &ky,
+                                                                    double const &kz, double const &phase,
+                                                                    size_t const &nx, size_t const &ny,
+                                                                    size_t const &nz)
 {
-    size_t const length = nx*ny*nz;
-    std::vector<double> outVec(length);
-    for (size_t i = 0; i < nx; i++)
-    {
-        for (size_t j = 0; j < ny; j++)
-        {
-            for (size_t k = 0; k < nz; k++)
-            {
-                double value = offset + amplitude
-                               * std::sin(kx*i + ky*j + kz*k + phase);
-
-                size_t index = (i * ny * nz) + (j * nz) + k;
-                outVec[index] = value;
-            }
-        }
+  size_t const length = nx * ny * nz;
+  std::vector<double> outVec(length);
+  for (size_t i = 0; i < nx; i++) {
+    for (size_t j = 0; j < ny; j++) {
+      for (size_t k = 0; k < nz; k++) {
+        double value = offset + amplitude * std::sin(kx * i + ky * j + kz * k + phase);
+
+        size_t index  = (i * ny * nz) + (j * nz) + k;
+        outVec[index] = value;
+      }
     }
-    return outVec;
+  }
+  return outVec;
 }
 // =============================================================================
 
 // =============================================================================
 // Constructor
-systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData,
-                                               bool const &hydroData,
-                                               bool const &useFiducialFile,
-                                               bool const &useSettingsFile)
-    :
-    _particleDataExists(particleData),
-    _hydroDataExists(hydroData)
+system_test::SystemTestRunner::SystemTestRunner(bool const &particleData, bool const &hydroData,
+                                                bool const &useFiducialFile, bool const &useSettingsFile)
+    : _particleDataExists(particleData), _hydroDataExists(hydroData)
 {
-    // Get the test name, with and underscore instead of a "." since
-    // we're actually generating file names
-    const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
-    std::stringstream nameStream;
-    std::string suiteName = test_info->test_suite_name();
-    suiteName = suiteName.substr(suiteName.find("/")+1, suiteName.length());
-    nameStream << suiteName << "_" << test_info->name();
-    std::string fullTestName = nameStream.str();
-    _fullTestFileName = fullTestName.substr(0, fullTestName.find("/"));
-
-    // Generate the input paths. Strip out everything after a "/" since that
-    // probably indicates a parameterized test
-    _chollaPath         = ::globalChollaRoot.getString()
-                          + "/bin/cholla."
-                          + ::globalChollaBuild.getString()
-                          + "." + ::globalChollaMachine.getString();
-    _chollaSettingsPath = ::globalChollaRoot.getString()
-                          + "/src/system_tests/input_files/"
-                          + _fullTestFileName + ".txt";
-    _fiducialFilePath   = ::globalChollaRoot.getString()
-                          + "/cholla-tests-data/system_tests/"
-                          + _fullTestFileName + ".h5";
-
-    // Generate output paths, these files don't exist yet
-    _outputDirectory    = ::globalChollaRoot.getString() + "/bin/" + fullTestName;
-    _consoleOutputPath  = _outputDirectory + "/" + _fullTestFileName + "_console.log";
-
-    // Create the new directory and check that it exists
-    // TODO: C++17: When we update to C++17 or newer this section should
-    // TODO: use std::filesystem to create the directory and check that
-    // TODO: it exists
-    if (system(("mkdir --parents " + _outputDirectory).c_str()) != 0)
-    {
-        std::cerr << "Warning: Directory '"
-                      + _outputDirectory
-                      + "' either already exists or could not be created."
-                      << std::endl;
+  // Get the test name, with and underscore instead of a "." since
+  // we're actually generating file names
+  const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
+  std::stringstream nameStream;
+  std::string suiteName = test_info->test_suite_name();
+  suiteName             = suiteName.substr(suiteName.find('/') + 1, suiteName.length());
+  nameStream << suiteName << "_" << test_info->name();
+  std::string fullTestName = nameStream.str();
+  _fullTestFileName        = fullTestName.substr(0, fullTestName.find('/'));
+
+  // Generate the input paths. Strip out everything after a "/" since that
+  // probably indicates a parameterized test.
+  _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + ::globalChollaBuild.getString() + "." +
+                ::globalChollaMachine.getString();
+
+  // Check that Cholla exists and abort if it doesn't
+  if (not std::filesystem::exists(_chollaPath)) {
+    throw std::invalid_argument("Error: Cholla executable not found.");
+  }
+
+  // Check that settings file exist
+  if (useSettingsFile) {
+    _chollaSettingsPath =
+        ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + _fullTestFileName + ".txt";
+  } else {
+    _chollaSettingsPath = ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + "blank_settings_file.txt";
+  }
+  if (not std::filesystem::exists(_chollaSettingsPath)) {
+    throw std::invalid_argument("Error: Cholla settings file not found at :" + _chollaSettingsPath);
+  }
+
+  // Check that the fiducial file exists and load it if it does
+  if (useFiducialFile) {
+    _fiducialFilePath = ::globalChollaRoot.getString() + "/cholla-tests-data/system_tests/" + _fullTestFileName + ".h5";
+    if (not std::filesystem::exists(_fiducialFilePath)) {
+      throw std::invalid_argument("Error: Cholla fiducial data file not found at :" + _fiducialFilePath);
     }
-
-    // Check that the files exist and load fiducial HDF5 file if required
-    _checkFileExists(_chollaPath);
-    if (useSettingsFile) _checkFileExists(_chollaSettingsPath);
-    if (useFiducialFile)
-    {
-        _checkFileExists(_fiducialFilePath);
-        _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY);
-        _fiducialDataSetNames = _findDataSetNames(_fiducialFile);
-        _fiducialFileExists   = true;
-    };
+    _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY);
+    _fiducialDataSetNames = _findDataSetNames(_fiducialFile);
+    _fiducialFileExists   = true;
+  } else {
+    _fiducialFilePath = "";
+  }
+
+  // Generate output paths, these files don't exist yet
+  _outputDirectory   = ::globalChollaRoot.getString() + "/bin/" + fullTestName;
+  _consoleOutputPath = _outputDirectory + "/" + _fullTestFileName + "_console.log";
+
+  // Create the new directory and check that it exists
+  // TODO: C++17: When we update to C++17 or newer this section should
+  // TODO: use std::filesystem to create the directory and check that
+  // TODO: it exists
+  if (system(("mkdir --parents " + _outputDirectory).c_str()) != 0) {
+    std::cerr << "Warning: Directory '" + _outputDirectory + "' either already exists or could not be created."
+              << std::endl;
+  }
 }
 // =============================================================================
 
 // =============================================================================
 // Destructor
-systemTest::SystemTestRunner::~SystemTestRunner()
+system_test::SystemTestRunner::~SystemTestRunner()
 {
-    _fiducialFile.close();
-    for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++)
-    {
-        if (_hydroDataExists) _testHydroFieldsFileVec[i].close();
-        if (_particleDataExists) _testParticlesFileVec[i].close();
+  _fiducialFile.close();
+  for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++) {
+    if (_hydroDataExists) {
+      _testHydroFieldsFileVec[i].close();
+    }
+    if (_particleDataExists) {
+      _testParticlesFileVec[i].close();
     }
+  }
 }
 // =============================================================================
 
@@ -392,331 +506,270 @@ systemTest::SystemTestRunner::~SystemTestRunner()
 // =============================================================================
 
 // =============================================================================
-void systemTest::SystemTestRunner::_checkFileExists(std::string const &filePath)
+void system_test::SystemTestRunner::_checkNumTimeSteps()
 {
-    // TODO C++17 std::filesystem does this better
-    std::fstream file;
-    file.open(filePath);
-    if (not file)
-    {
-        std::string errMessage = "Error: File '" + filePath + "' not found.";
-        throw std::invalid_argument(errMessage);
-    }
-}
+  int fiducialNSteps, testNSteps;
+
+  H5::Attribute tStepAttr;
+  if (_hydroDataExists) {
+    tStepAttr = _testHydroFieldsFileVec[0].openAttribute("n_step");
+  } else if (_particleDataExists) {
+    tStepAttr = _testParticlesFileVec[0].openAttribute("n_step");
+  } else {
+    std::string errMessage = "Error: Both hydro and particle data are  turned off.";
+    throw std::invalid_argument(errMessage);
+  }
+
+  tStepAttr.read(H5::PredType::NATIVE_INT, &testNSteps);
+
+  if (_fiducialFileExists) {
+    tStepAttr = _fiducialFile.openAttribute("n_step");
+    tStepAttr.read(H5::PredType::NATIVE_INT, &fiducialNSteps);
+  } else {
+    fiducialNSteps = _numFiducialTimeSteps;
+  }
+
+  EXPECT_EQ(fiducialNSteps, testNSteps) << "The number of time steps is not equal";
+};
 // =============================================================================
 
 // =============================================================================
-void systemTest::SystemTestRunner::_safeMove(std::string const &sourcePath,
-                                             std::string const &destinationDirectory)
+std::vector<double> system_test::SystemTestRunner::loadTestFieldData(std::string dataSetName,
+                                                                     std::vector<size_t> &testDims,
+                                                                     std::vector<H5::H5File> file)
 {
-    // TODO C++17 std::filesystem does this better
-    _checkFileExists(sourcePath);
-    if(std::rename(sourcePath.c_str(), (destinationDirectory + "/" + sourcePath).c_str()) < 0)
-    {
-        std::string errMessage = "Error: File '"
-                                    + sourcePath
-                                    + "' could not be moved to '"
-                                    + destinationDirectory
-                                    + "`";
-        throw std::invalid_argument(errMessage);
+  // Switch which fileset we're using if it's a particle dataset
+  if (dataSetName == "particle_density") {
+    file        = _testParticlesFileVec;
+    dataSetName = "density";
+  } else if (file.empty()) {
+    file = _testHydroFieldsFileVec;
+  }
+
+  // Get the size of each dimension. Check if the field is a magnetic
+  // field or not to make sure we're retreiving the right dimensions
+  H5::Attribute dimensions = file[0].openAttribute("dims");
+  dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data());
+
+  if (dataSetName == "magnetic_x") {
+    testDims.at(0)++;
+  } else if (dataSetName == "magnetic_y") {
+    testDims.at(1)++;
+  } else if (dataSetName == "magnetic_z") {
+    testDims.at(2)++;
+  }
+
+  // Allocate the vector and initialize to a quiet NaN to make failed writes clearer
+  std::vector<double> testData(testDims[0] * testDims[1] * testDims[2], std::numeric_limits<double>::quiet_NaN());
+
+  for (size_t rank = 0; rank < numMpiRanks; rank++) {
+    // Open the dataset
+    H5::DataSet const testDataSet = file[rank].openDataSet(dataSetName);
+
+    // Determine dataset size/shape and check that it's correct
+    H5::DataSpace const testDataSpace = testDataSet.getSpace();
+
+    std::vector<hsize_t> tempDims{1, 1, 1};
+    int numTestDims = testDataSpace.getSimpleExtentDims(tempDims.data());
+
+    // Allocate vectors, Note that I'm casting everything to double. Some
+    // of the vectors are ints in the HDF5 file and if the casting
+    // becomes an issue we can fix it later
+    std::vector<double> tempArr(tempDims[0] * tempDims[1] * tempDims[2]);
+
+    // Read in data
+    testDataSet.read(tempArr.data(), H5::PredType::NATIVE_DOUBLE);
+
+    // Get offset
+    std::vector<int> offset(3, 1);
+    H5::Attribute offsetAttr = file[rank].openAttribute("offset");
+    offsetAttr.read(H5::PredType::NATIVE_INT, offset.data());
+
+    // Get dims_local
+    std::vector<int> dimsLocal(3, 1);
+    H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local");
+    dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data());
+
+    if (dataSetName == "magnetic_x") {
+      dimsLocal.at(0)++;
+    } else if (dataSetName == "magnetic_y") {
+      dimsLocal.at(1)++;
+    } else if (dataSetName == "magnetic_z") {
+      dimsLocal.at(2)++;
     }
-}
-// =============================================================================
-
-// =============================================================================
-void systemTest::SystemTestRunner::_checkNumTimeSteps()
-{
-    int fiducialNSteps, testNSteps;
 
-    H5::Attribute tStepAttr;
-    if (_hydroDataExists)
-    {
-        tStepAttr = _testHydroFieldsFileVec[0].openAttribute("n_step");
-    }
-    else if (_particleDataExists)
-    {
-        tStepAttr = _testParticlesFileVec[0].openAttribute("n_step");
-    }
-    else
-    {
-        std::string errMessage = "Error: Both hydro and particle data are  turned off.";
-        throw std::invalid_argument(errMessage);
-    }
+    // Now we add the data to the larger vector
+    size_t localIndex = 0;
+    for (size_t i = offset[0]; i < offset[0] + dimsLocal[0]; i++) {
+      for (size_t j = offset[1]; j < offset[1] + dimsLocal[1]; j++) {
+        for (size_t k = offset[2]; k < offset[2] + dimsLocal[2]; k++) {
+          // Compute the location to put the next element
+          size_t overallIndex = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
 
-    tStepAttr.read(H5::PredType::NATIVE_INT, &testNSteps);
+          // Perform copy
+          testData[overallIndex] = tempArr[localIndex];
 
-    if (_fiducialFileExists)
-    {
-        tStepAttr = _fiducialFile.openAttribute("n_step");
-        tStepAttr.read(H5::PredType::NATIVE_INT, &fiducialNSteps);
-    }
-    else
-    {
-        fiducialNSteps = _numFiducialTimeSteps;
+          // Increment local index
+          localIndex++;
+        }
+      }
     }
+  }
 
-    EXPECT_EQ(fiducialNSteps, testNSteps)
-              << "The number of time steps is not equal";
-};
+  // Return the entire, concatenated, dataset
+  return testData;
+}
 // =============================================================================
 
 // =============================================================================
-std::vector<double> systemTest::SystemTestRunner::loadTestFieldData(
-        std::string dataSetName,
-        std::vector<size_t> &testDims)
+std::vector<double> system_test::SystemTestRunner::_loadTestParticleData(std::string const &dataSetName)
 {
-    // Get the file we're using
-    std::vector<H5::H5File> file;
-    if (dataSetName == "particle_density")
-    {
-        file = _testParticlesFileVec;
-        dataSetName = "density";
-    }
-    else
-    {
-        file = _testHydroFieldsFileVec;
-    }
-
-    // Get the size of each dimension
-    H5::Attribute dimensions = file[0].openAttribute("dims");
-    dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data());
-
-    // Allocate the vector
-    std::vector<double> testData(testDims[0] * testDims[1] * testDims[2]);
-
-    for (size_t rank = 0; rank < numMpiRanks; rank++)
-    {
-        // Open the dataset
-        H5::DataSet const testDataSet = file[rank].openDataSet(dataSetName);
-
-        // Determine dataset size/shape and check that it's correct
-        H5::DataSpace const testDataSpace = testDataSet.getSpace();
-
-        std::vector<hsize_t> tempDims{1,1,1};
-        int numTestDims = testDataSpace.getSimpleExtentDims(tempDims.data());
-
-        // Allocate vectors, Note that I'm casting everything to double. Some
-        // of the vectors are ints in the HDF5 file and if the casting
-        // becomes an issue we can fix it later
-        std::vector<double> tempArr(tempDims[0] * tempDims[1] * tempDims[2]);
-
-        // Read in data
-        testDataSet.read(tempArr.data(), H5::PredType::NATIVE_DOUBLE);
-
-        // Get offset
-        std::vector<int> offset(3,1);
-        H5::Attribute offsetAttr = file[rank].openAttribute("offset");
-        offsetAttr.read(H5::PredType::NATIVE_INT, offset.data());
-
-        // Get dims_local
-        std::vector<int> dimsLocal(3,1);
-        H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local");
-        dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data());
-
-        // Now we add the data to the larger vector
-        size_t localIndex = 0;
-        for (size_t i = offset[0]; i < offset[0] + dimsLocal[0]; i++)
-        {
-            for (size_t j = offset[1]; j < offset[1] + dimsLocal[1]; j++)
-            {
-                for (size_t k = offset[2]; k < offset[2] + dimsLocal[2]; k++)
-                {
-                    // Compute the location to put the next element
-                    size_t overallIndex = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
-
-                    // Perform copy
-                    testData[overallIndex] = tempArr[localIndex];
-
-                    // Increment local index
-                    localIndex++;
-                }
-            }
-        }
+  // Determine the total number of particles
+  if (_testTotalNumParticles == 0) {
+    for (auto const &file : _testParticlesFileVec) {
+      // Open the dataset
+      H5::DataSet const dataSet = file.openDataSet(dataSetName);
+
+      // Determine dataset size/shape and check that it's correct
+      H5::DataSpace dataSpace = dataSet.getSpace();
+
+      // Get the number of elements and increase the total count
+      size_t localNumParticles = dataSpace.getSimpleExtentNpoints();
+      _testTotalNumParticles += localNumParticles;
     }
+  }
+
+  // Allocate the vectors
+  std::vector<double> unsortedTestData;
+  std::vector<double> testData(_testTotalNumParticles);
+
+  // Load in the data
+  for (size_t rank = 0; rank < numMpiRanks; rank++) {
+    // Open the dataset
+    H5::DataSet const testDataSet = _testParticlesFileVec[rank].openDataSet(dataSetName);
+
+    // Determine dataset size/shape and check that it's correct
+    H5::DataSpace const testDataSpace = testDataSet.getSpace();
+
+    size_t localNumParticles = testDataSpace.getSimpleExtentNpoints();
+    std::vector<double> tempVector(localNumParticles);
+
+    // Read in data
+    testDataSet.read(tempVector.data(), H5::PredType::NATIVE_DOUBLE);
+    unsortedTestData.insert(unsortedTestData.end(), tempVector.begin(), tempVector.end());
+  }
+
+  // Generate the sorting vector if it's not already generated
+  std::vector<size_t> tempSortedIndices;
+  if (dataSetName == "particle_IDs") {
+    tempSortedIndices.resize(_testTotalNumParticles);
+    std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0);
+    std::sort(tempSortedIndices.begin(), tempSortedIndices.end(),
+              [&](size_t A, size_t B) -> bool { return unsortedTestData[A] < unsortedTestData[B]; });
+  }
+  std::vector<size_t> static const sortedIndices = tempSortedIndices;
+
+  // Sort the vector
+  for (size_t i = 0; i < _testTotalNumParticles; i++) {
+    testData.at(i) = unsortedTestData.at(sortedIndices.at(i));
+  }
+
+  // Return the entire dataset fully concatenated and sorted
+  return testData;
+}
+// =============================================================================
 
-    // Return the entire, concatenated, dataset
-    return testData;
+// =============================================================================
+std::vector<double> system_test::SystemTestRunner::_loadFiducialFieldData(std::string const &dataSetName)
+{
+  if (_fiducialFileExists and (_fiducialDataSets.find(dataSetName) == _fiducialDataSets.end())) {
+    // Open the dataset
+    H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName);
+
+    // Determine dataset size/shape and check that it's correct
+    H5::DataSpace fiducialDataSpace = fiducialDataSet.getSpace();
+
+    std::vector<hsize_t> fidDims{1, 1, 1};
+    fiducialDataSpace.getSimpleExtentDims(fidDims.data());
+
+    // Allocate vectors, Note that I'm casting everything to double. Some
+    // of the vectors are ints in the HDF5 file and if the casting
+    // becomes an issue we can fix it later
+    std::vector<double> fiducialData(fidDims[0] * fidDims[1] * fidDims[2]);
+
+    // Read in data
+    fiducialDataSet.read(fiducialData.data(), H5::PredType::NATIVE_DOUBLE);
+    return fiducialData;
+  } else {
+    return _fiducialDataSets[dataSetName];
+  }
 }
 // =============================================================================
 
 // =============================================================================
-std::vector<double> systemTest::SystemTestRunner::_loadTestParticleData(
-        std::string const &dataSetName)
+std::vector<double> system_test::SystemTestRunner::_loadFiducialParticleData(std::string const &dataSetName)
 {
+  if (_fiducialFileExists) {
     // Determine the total number of particles
-    if (_testTotalNumParticles == 0)
-    {
-        for (auto file: _testParticlesFileVec)
-        {
-            // Open the dataset
-            H5::DataSet const dataSet = file.openDataSet(dataSetName);
-
-            // Determine dataset size/shape and check that it's correct
-            H5::DataSpace dataSpace = dataSet.getSpace();
-
-            // Get the number of elements and increase the total count
-            size_t localNumParticles = dataSpace.getSimpleExtentNpoints();
-            _testTotalNumParticles += localNumParticles;
-        }
+    if (_fiducialTotalNumParticles == 0) {
+      // Open the dataset
+      H5::DataSet const dataSet = _fiducialFile.openDataSet(dataSetName);
+
+      // Determine dataset size/shape and check that it's correct
+      H5::DataSpace dataSpace = dataSet.getSpace();
+
+      // Get the number of elements and increase the total count
+      size_t localNumParticles = dataSpace.getSimpleExtentNpoints();
+      _fiducialTotalNumParticles += localNumParticles;
     }
 
     // Allocate the vectors
-    std::vector<double> unsortedTestData;
-    std::vector<double> testData(_testTotalNumParticles);
+    std::vector<double> unsortedFiducialData(_fiducialTotalNumParticles);
+    std::vector<double> fiducialData(_fiducialTotalNumParticles);
 
     // Load in the data
-    for (size_t rank = 0; rank < numMpiRanks; rank++)
-    {
-        // Open the dataset
-        H5::DataSet const testDataSet = _testParticlesFileVec[rank].openDataSet(dataSetName);
-
-        // Determine dataset size/shape and check that it's correct
-        H5::DataSpace const testDataSpace = testDataSet.getSpace();
-
-        size_t localNumParticles = testDataSpace.getSimpleExtentNpoints();
-        std::vector<double> tempVector(localNumParticles);
-
-        // Read in data
-        testDataSet.read(tempVector.data(),
-                         H5::PredType::NATIVE_DOUBLE);
-        unsortedTestData.insert(unsortedTestData.end(),
-                                tempVector.begin(),
-                                tempVector.end() );
-    }
+    // Open the dataset
+    H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName);
+
+    // Determine dataset size/shape and check that it's correct
+    H5::DataSpace const testDataSpace = fiducialDataSet.getSpace();
+
+    size_t localNumParticles = testDataSpace.getSimpleExtentNpoints();
+
+    // Read in data
+    fiducialDataSet.read(unsortedFiducialData.data(), H5::PredType::NATIVE_DOUBLE);
 
     // Generate the sorting vector if it's not already generated
     std::vector<size_t> tempSortedIndices;
-    if (dataSetName == "particle_IDs")
-    {
-        tempSortedIndices.resize(_testTotalNumParticles);
-        std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0);
-        std::sort(tempSortedIndices.begin(), tempSortedIndices.end(),
-                [&](size_t A, size_t B) -> bool {
-                        return unsortedTestData[A] < unsortedTestData[B];
-                    });
+    if (dataSetName == "particle_IDs") {
+      tempSortedIndices.resize(_fiducialTotalNumParticles);
+      std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0);
+      std::sort(tempSortedIndices.begin(), tempSortedIndices.end(),
+                [&](size_t A, size_t B) -> bool { return unsortedFiducialData.at(A) < unsortedFiducialData.at(B); });
     }
-    std::vector<size_t> static const sortedIndices = tempSortedIndices;
+    std::vector<size_t> const static sortedIndices = tempSortedIndices;
 
     // Sort the vector
-    for (size_t i = 0; i < _testTotalNumParticles; i++)
-    {
-        testData.at(i) = unsortedTestData.at(sortedIndices.at(i));
+    for (size_t i = 0; i < _fiducialTotalNumParticles; i++) {
+      fiducialData.at(i) = unsortedFiducialData.at(sortedIndices.at(i));
     }
 
     // Return the entire dataset fully concatenated and sorted
-    return testData;
+    return fiducialData;
+  } else {
+    return _fiducialDataSets[dataSetName];
+  }
 }
 // =============================================================================
 
 // =============================================================================
-std::vector<double> systemTest::SystemTestRunner::_loadFiducialFieldData(
-    std::string const &dataSetName)
+std::vector<std::string> system_test::SystemTestRunner::_findDataSetNames(H5::H5File const &inputFile)
 {
-    if (_fiducialFileExists)
-    {
-        // Open the dataset
-        H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName);
-
-        // Determine dataset size/shape and check that it's correct
-        H5::DataSpace fiducialDataSpace = fiducialDataSet.getSpace();
-
-        std::vector<hsize_t> fidDims{1,1,1};
-        fiducialDataSpace.getSimpleExtentDims(fidDims.data());
+  std::vector<std::string> outputVector;
 
-        // Allocate vectors, Note that I'm casting everything to double. Some
-        // of the vectors are ints in the HDF5 file and if the casting
-        // becomes an issue we can fix it later
-        std::vector<double> fiducialData(fidDims[0] * fidDims[1] * fidDims[2]);
-
-        // Read in data
-        fiducialDataSet.read(fiducialData.data(), H5::PredType::NATIVE_DOUBLE);
-        return fiducialData;
-    }
-    else
-    {
-        return _fiducialDataSets[dataSetName];
-    }
-}
-// =============================================================================
-
-// =============================================================================
-std::vector<double> systemTest::SystemTestRunner::_loadFiducialParticleData(
-        std::string const &dataSetName)
-{
-    if (_fiducialFileExists)
-    {
-        // Determine the total number of particles
-        if (_fiducialTotalNumParticles == 0)
-        {
-            // Open the dataset
-            H5::DataSet const dataSet = _fiducialFile.openDataSet(dataSetName);
-
-            // Determine dataset size/shape and check that it's correct
-            H5::DataSpace dataSpace = dataSet.getSpace();
-
-            // Get the number of elements and increase the total count
-            size_t localNumParticles = dataSpace.getSimpleExtentNpoints();
-            _fiducialTotalNumParticles += localNumParticles;
-        }
-
-        // Allocate the vectors
-        std::vector<double> unsortedFiducialData(_fiducialTotalNumParticles);
-        std::vector<double> fiducialData(_fiducialTotalNumParticles);
-
-        // Load in the data
-        // Open the dataset
-        H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName);
-
-        // Determine dataset size/shape and check that it's correct
-        H5::DataSpace const testDataSpace = fiducialDataSet.getSpace();
-
-        size_t localNumParticles = testDataSpace.getSimpleExtentNpoints();
-
-        // Read in data
-        fiducialDataSet.read(unsortedFiducialData.data(),
-                            H5::PredType::NATIVE_DOUBLE);
-
-        // Generate the sorting vector if it's not already generated
-        std::vector<size_t> tempSortedIndices;
-        if (dataSetName == "particle_IDs")
-        {
-            tempSortedIndices.resize(_fiducialTotalNumParticles);
-            std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0);
-            std::sort(tempSortedIndices.begin(), tempSortedIndices.end(),
-                    [&](size_t A, size_t B) -> bool {
-                            return unsortedFiducialData.at(A) < unsortedFiducialData.at(B);
-                        });
-        }
-        std::vector<size_t> const static sortedIndices = tempSortedIndices;
-
-        // Sort the vector
-        for (size_t i = 0; i < _fiducialTotalNumParticles; i++)
-        {
-            fiducialData.at(i) = unsortedFiducialData.at(sortedIndices.at(i));
-        }
-
-        // Return the entire dataset fully concatenated and sorted
-        return fiducialData;
-    }
-    else
-    {
-        return _fiducialDataSets[dataSetName];
-    }
-}
-// =============================================================================
-
-// =============================================================================
-std::vector<std::string> systemTest::SystemTestRunner::_findDataSetNames(
-                                                    H5::H5File const &inputFile)
-{
-    std::vector<std::string> outputVector;
-
-    for (size_t dataSetID = 0;
-         dataSetID < inputFile.getNumObjs();
-         dataSetID++)
-    {
-        outputVector.push_back(inputFile.getObjnameByIdx(dataSetID));
-    }
-    return outputVector;
+  for (size_t dataSetID = 0; dataSetID < inputFile.getNumObjs(); dataSetID++) {
+    outputVector.push_back(inputFile.getObjnameByIdx(dataSetID));
+  }
+  return outputVector;
 };
 // =============================================================================
diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h
index 6d5aa1925..c0612806e 100644
--- a/src/system_tests/system_tester.h
+++ b/src/system_tests/system_tester.h
@@ -9,10 +9,10 @@
 #pragma once
 
 // STL includes
-#include <string>
-#include <vector>
 #include <memory>
+#include <string>
 #include <unordered_map>
+#include <vector>
 
 // External Libraries and Headers
 #include <H5Cpp.h>
@@ -21,355 +21,358 @@
  * \brief This namespace contains one class, SystemTestRunner, whose
  * purpose is to (as you might expect) run system tests.
  */
-namespace systemTest
+namespace system_test
 {
-    /*!
-     * \brief Runs a system test using the full test name to determine all
-     * paths.
-     *
-     * \details By default this class uses the full name of your test, i.e. the test
-     * suite name plus the test name, along with some global variables to
-     * determine the paths to all the input files. The global variables are all
-     * set in main_tests.cpp and are the path to the Cholla directory, the make
-     * type being used, and the machine being run on. If the main function does
-     * get those it will throw an error so that error checking is not done here.
-     *
-     * To run a system test simply name the test according to convetion and put
-     * the input file in the `cholla/src/system_tests/input_files` directory and
-     * the data file in the `cholla/src/system_tests/fiducial_data` directory.
-     * Then name the files `testSuiteName_testCaseName` with the `.txt` or `.h5`
-     * extension respectively. If this class can't find the files it will
-     * throw an error with the path it searched. All the output files from the
-     * test are deposited in `cholla/bin/testSuiteName_testCaseName`
-     *
-     * More advanced functionality is provided with a series of member functions
-     * that allow you to programmatically generate the fiducial HDF5 file,
-     * choose which datasets to compare, whether or not to compare the number of
-     * time steps, etc.
-     *
-     */
-    class SystemTestRunner;
-} // namespace systemTest
-
-class systemTest::SystemTestRunner
+/*!
+ * \brief Runs a system test using the full test name to determine all
+ * paths.
+ *
+ * \details By default this class uses the full name of your test, i.e. the test
+ * suite name plus the test name, along with some global variables to
+ * determine the paths to all the input files. The global variables are all
+ * set in main_tests.cpp and are the path to the Cholla directory, the make
+ * type being used, and the machine being run on. If the main function does
+ * get those it will throw an error so that error checking is not done here.
+ *
+ * To run a system test simply name the test according to convetion and put
+ * the input file in the `cholla/src/system_tests/input_files` directory and
+ * the data file in the `cholla/src/system_tests/fiducial_data` directory.
+ * Then name the files `testSuiteName_testCaseName` with the `.txt` or `.h5`
+ * extension respectively. If this class can't find the files it will
+ * throw an error with the path it searched. All the output files from the
+ * test are deposited in `cholla/bin/testSuiteName_testCaseName`
+ *
+ * More advanced functionality is provided with a series of member functions
+ * that allow you to programmatically generate the fiducial HDF5 file,
+ * choose which datasets to compare, whether or not to compare the number of
+ * time steps, etc.
+ *
+ */
+class SystemTestRunner;
+}  // namespace system_test
+
+class system_test::SystemTestRunner
 {
-public:
-    /// The number of MPI ranks, defaults to 1
-    size_t numMpiRanks = 1;
-
-    /*!
-     * \brief Set the parameters that Cholla launches with, potentially entirely
-     * replacing the need for a settings file. A string of the launch parameters
-     * that will override the values in the settings file (if given). Any of
-     * Cholla's standard launch paramters work except `outdir` as that is
-     * reserved for usage in the systemTest::SystemTestRunner.runTest() method
-     */
-    std::string chollaLaunchParams;
-
-    /*!
-     * \brief Run the system test that has been set up
-     *
-     */
-    void runTest();
-
-    void launchCholla();
-
-    void openHydroTestData();
-    /*!
-     * \brief Get the Cholla Path object
-     *
-     * \return std::string The path to the Cholla executable
-     */
-    std::string getChollaPath(){return _chollaPath;};
-
-    /*!
-     * \brief Get the Cholla Settings File Path object
-     *
-     * \return std::string The full filename/path to the settings file used to
-     * initialize Cholla
-     */
-    std::string getChollaSettingsFilePath(){return _chollaSettingsPath;};
-
-    /*!
-     * \brief Get the Output Directory object
-     *
-     * \return std::string The path to the directory where all the output is
-     * stored
-     */
-    std::string getOutputDirectory(){return _outputDirectory;};
-
-    /*!
-     * \brief Get the Console Output Path object
-     *
-     * \return std::string The full filename/path to the file where all the
-     * console output is stored
-     */
-    std::string getConsoleOutputPath(){return _consoleOutputPath;};
-
-    /*!
-     * \brief Get the Fiducial File object
-     *
-     * \return H5::H5File
-     */
-    H5::H5File getFiducialFile(){return _fiducialFile;};
-
-    /*!
-     * \brief Get the Test File object
-     *
-     * \param index The MPI rank of the file you want to return. Defaults to 0
-     * \return H5::H5File
-     */
-    H5::H5File getTestFile(size_t const &i = 0){return _testHydroFieldsFileVec[i];};
-
-    /*!
-     * \brief Get the vector of datasets that will be tested
-     *
-     * \return std::vector<std::string>
-     */
-    std::vector<std::string> getDataSetsToTest(){return _fiducialDataSetNames;};
-
-    /*!
-     * \brief Choose which datasets to test. By default it tests all the
-     * datasets in the fiducial data. A warning will be thrown if not all the
-     * datasets are being tested. Note that any call to this function will
-     * overwrite the default values
-     *
-     * \param[in] dataSetNames A std::vector of std::strings where each entry is
-     * a dataset name. Note that it is case sensitive
-     */
-    void setDataSetsToTest(std::vector<std::string> const &dataSetNames)
-        {_fiducialDataSetNames = dataSetNames;};
-
-    /*!
-     * \brief Set the Compare Num Time Steps object
-     *
-     * \param[in] compare Defaults to `true`. If false then the number of timesteps
-     * is not compared.
-     */
-    void setCompareNumTimeSteps(bool const &compare)
-    {_compareNumTimeSteps = compare;};
-
-    /*!
-     * \brief Set or add a fiducial dataset
-     *
-     * \param[in] fieldName The name of the field to be added
-     * \param[in] dataArr The std::vector for the data vector to be added as
-     * a data set
-     */
-    void setFiducialData(std::string const &fieldName,
-                         std::vector<double> const &dataVec);
-
-    /*!
-     * \brief Set the Fiducial Num Time Steps object
-     *
-     * \param numTimeSteps The number of time steps in the fiducial data
-     */
-    void setFiducialNumTimeSteps(int const &numTimeSteps)
-        {_numFiducialTimeSteps = numTimeSteps;};
-
-    /*!
-     * \brief Generate an vector of the specified size populated by the specified
-     * value.
-     *
-     * \param[in] value The value to populate the vector with
-     * \param[in] nx (optional) The size of the field in the x-direction.
-     * Defaults to 1
-     * \param[in] ny (optional) The size of the field in the y-direction.
-     * Defaults to 1
-     * \param[in] nz (optional) The size of the field in the z-direction.
-     * Defaults to 1
-     * \return std::vector<double> A 1-dimensional std::vector of the required
-     * size containing the data.
-     */
-    std::vector<double> generateConstantData(double const &value,
-                                             size_t const &nx=1,
-                                             size_t const &ny=1,
-                                             size_t const &nz=1);
-
-    /*!
-     * \brief Load the test data for physical fields from the HDF5 file(s). If
-     * there is more than one HDF5 file then it concatenates the contents into a
-     * single vector. Particle data is handeled with _loadTestParticleData
-     *
-     * \param[in] dataSetName The name of the dataset to get
-     * \param[out] testDims An vector with the length of each dimension in it
-     * \return std::vector<double> A vector containing the data
-     */
-    std::vector<double> loadTestFieldData(std::string dataSetName,
-					  std::vector<size_t> &testDims);
-
-    /*!
-     * \brief Generate a std::vector of the specified size populated by a sine
-     * wave. The equation used to generate the wave is:
-     *
-     * wave = offset + amplitude * sin(kx*xIndex + ky*yIndex + kz*zIndex + phase)
-     *
-     * \param[in] offset Flat offset from zero
-     * \param[in] amplitude Amplitude of the wave
-     * \param[in] kx The x component of the wave vector in pixel units
-     * \param[in] ky The y component of the wave vector in pixel units
-     * \param[in] kz The z component of the wave vector in pixel units
-     * \param[in] phase Phase of the sine wave
-     * \param[in] nx (optional) The size of the field in the x-direction.
-     * Defaults to 1
-     * \param[in] ny (optional) The size of the field in the y-direction.
-     * Defaults to 1
-     * \param[in] nz (optional) The size of the field in the z-direction.
-     * Defaults to 1
-     * \return std::vector<double> A 1-dimensional std::vector of the required
-     * size containing the data.
-     */
-    std::vector<double> generateSineData(double const &offset,
-                                         double const &amplitude,
-                                         double const &kx,
-                                         double const &ky,
-                                         double const &kz,
-                                         double const &phase,
-                                         size_t const &nx=1,
-                                         size_t const &ny=1,
-                                         size_t const &nz=1);
-
-    // Constructor and Destructor
-    /*!
-     * \brief Construct a new System Test Runner object
-     *
-     * \param[in] particleData Is there particle data?
-     * \param[in] hydroData Is there hydro data?
-     * \param[in] useFiducialFile Indicate if you're using a HDF5 file or will
-     * generate your own. Defaults to `true`, i.e. using an HDF5 file. Set to
-     * `false` to generate your own
-     * \param[in] useSettingsFile Indicate if you're using a settings file. If
-     * `true` then the settings file is automatically found based on the naming
-     * convention. If false then the user MUST provide all the required settings
-     * with the SystemTestRunner::setChollaLaunchParams method
-     */
-    SystemTestRunner(bool const &particleData=false,
-                     bool const &hydroData=true,
-                     bool const &useFiducialFile=true,
-                     bool const &useSettingsFile=true);
-    ~SystemTestRunner();
-
-private:
-    /// The fiducial dat file
-    H5::H5File _fiducialFile;
-    /// The test hydro field data files
-    std::vector<H5::H5File> _testHydroFieldsFileVec;
-    /// The test particle data files
-    std::vector<H5::H5File> _testParticlesFileVec;
-
-    /// The path to the Cholla executable
-    std::string _chollaPath;
-    /// The full name of the test with an underscore instead of a period. This
-    /// is the name of many of the input files, the output directory, etc
-    std::string _fullTestFileName;
-    /// The path to the Cholla settings file
-    std::string _chollaSettingsPath;
-    /// The path to the fiducial data file
-    std::string _fiducialFilePath;
-    /// The path to the output directory
-    std::string _outputDirectory;
-    /// The path and name of the console output file
-    std::string _consoleOutputPath;
-
-    /// A list of all the data set names in the fiducial data file
-    std::vector<std::string> _fiducialDataSetNames;
-    /// A list of all the data set names in the test data file
-    std::vector<std::string> _testDataSetNames;
-
-    /// The number of fiducial time steps
-    int _numFiducialTimeSteps;
-    /// Map of fiducial data sets if we're not using a fiducial file
-    std::unordered_map<std::string, std::vector<double>> _fiducialDataSets;
-
-    /// The test particle IDs
-    std::vector<double> _testParticleIDs;
-    /// The total number of particles in the test dataset
-    size_t _testTotalNumParticles=0;
-    /// The fiducial particle IDs
-    std::vector<double> _fiducialParticleIDs;
-    /// The total number of particles in the fiducial dataset
-    size_t _fiducialTotalNumParticles=0;
-
-    /// Flag to indicate if a fiducial HDF5 data file is being used or a
-    /// programmatically generated H5File object. `true` = use a file, `false` =
-    /// use generated H5File object
-    bool _fiducialFileExists = false;
-    /// Flag to choose whether or not to compare the number of time steps
-    bool _compareNumTimeSteps = true;
-
-    /// Flag to indicate whether or not there is hydro field data
-    /// If true then hydro data files are searched for and will be compared to
-    /// fiducial values. If false then it is assumed that the test produces no
-    /// hydro field data
-    bool _hydroDataExists = true;
-    /// Flag to indicate whether or not there is particle data
-    /// If true then particle data files are searched for and will be compared
-    /// to fiducial values. If false then it is assumed that the test produces
-    /// no particle data
-    bool _particleDataExists = false;
-
-
-    /*!
-    * \brief Move a file. Throws an exception if the file does not exist.
-    * or if the move was unsuccessful
-    *
-    * \param[in] sourcePath The path the the file to be moved
-    * \param[in] destinationDirectory The path to the director the file should
-    * be moved to
-    */
-    void _safeMove(std::string const &sourcePath,
-                   std::string const &destinationDirectory);
-
-    /*!
-    * \brief Checks if the given file exists. Throws an exception if the
-    * file does not exist.
-    *
-    * \param[in] filePath The path to the file to check for
-    */
-    void _checkFileExists(std::string const &filePath);
-
-    /*!
-     * \brief Using GTest assertions to check if the fiducial and test data have
-     * the same number of time steps
-     *
-     */
-    void _checkNumTimeSteps();
-
-    /*!
-     * \brief Load the test data for particles from the HDF5 file(s). If
-     * there is more than one HDF5 file then it concatenates the contents into a
-     * single vector. Field data is handeled with _loadTestFieldData
-     *
-     * \param[in] dataSetName The name of the dataset to get
-     * \return std::vector<double> A vector containing the data
-     */
-    std::vector<double> _loadTestParticleData(std::string const &dataSetName);
-
-    /*!
-     * \brief Load the test data for physical fields from the HDF5 file or
-     * returns the user set vector.
-     * Particle data is handeled with _loadFiducialParticleData.
-     *
-     * \param[in] dataSetName The name of the dataset to get
-     * \return std::vector<double> A vector with the contents of the data set
-     */
-    std::vector<double> _loadFiducialFieldData(std::string const &dataSetName);
-
-    /*!
-     * \brief Load the fiducial data for particles from the HDF5 file or return
-     * the user set vector. Field data is handeled with _loadFiducialFieldData
-     *
-     * \param[in] dataSetName The name of the dataset to get
-     * \return std::vector<double> A vector containing the data
-     */
-    std::vector<double> _loadFiducialParticleData(std::string const &dataSetName);
-
-
-    /*!
-     * \brief Return a vector of all the dataset names in the given HDF5 file
-     *
-     * \param[in] inputFile The HDF5 file to find names in
-     * \return std::vector<std::string>
-     */
-    std::vector<std::string> _findDataSetNames(H5::H5File const &inputFile);
-}; // End of class systemTest::SystemTestRunner
+ public:
+  /// The number of MPI ranks, defaults to 1
+  size_t numMpiRanks = 1;
+
+  /*!
+   * \brief Set the parameters that Cholla launches with, potentially entirely
+   * replacing the need for a settings file. A string of the launch parameters
+   * that will override the values in the settings file (if given). Any of
+   * Cholla's standard launch paramters work except `outdir` as that is
+   * reserved for usage in the system_test::SystemTestRunner.runTest() method
+   */
+  std::string chollaLaunchParams;
+
+  /*!
+   * \brief Run the system test that has been set up
+   *
+   */
+  void runTest(bool const &compute_L2_norm_only = false, double const &maxAllowedL1Error = 0.0,
+               double const &maxAllowedError = 0.0);
+
+  /*!
+   * \brief Compute the L1 error for each field compared to the initial
+   * conditions. Doesn't work with particle data
+   *
+   * \param[in] maxAllowedL1Error The maximum allowed L1 error for this test
+   * \param[in] maxAllowedError The maximum allowed for any value in the test
+   *
+   */
+  void runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError = 1E-7);
+
+  /*!
+   * \brief Launch Cholla as it is set up
+   *
+   */
+  void launchCholla();
+
+  void openHydroTestData();
+
+  /*!
+   * \brief Get the Cholla Path object
+   *
+   * \return std::string The path to the Cholla executable
+   */
+  std::string getChollaPath() { return _chollaPath; };
+
+  /*!
+   * \brief Get the Cholla Settings File Path object
+   *
+   * \return std::string The full filename/path to the settings file used to
+   * initialize Cholla
+   */
+  std::string getChollaSettingsFilePath() { return _chollaSettingsPath; };
+
+  /*!
+   * \brief Get the L2Norm
+   *
+   * \return double The L2Norm of the last run test
+   */
+  double getL2Norm() { return L2Norm_; };
+
+  /*!
+   * \brief Get the Output Directory object
+   *
+   * \return std::string The path to the directory where all the output is
+   * stored
+   */
+  std::string getOutputDirectory() { return _outputDirectory; };
+
+  /*!
+   * \brief Get the Console Output Path object
+   *
+   * \return std::string The full filename/path to the file where all the
+   * console output is stored
+   */
+  std::string getConsoleOutputPath() { return _consoleOutputPath; };
+
+  /*!
+   * \brief Get the Fiducial File object
+   *
+   * \return H5::H5File
+   */
+  H5::H5File getFiducialFile() { return _fiducialFile; };
+
+  /*!
+   * \brief Get the Test File object
+   *
+   * \param index The MPI rank of the file you want to return. Defaults to 0
+   * \return H5::H5File
+   */
+  H5::H5File getTestFile(size_t const &i = 0) { return _testHydroFieldsFileVec[i]; };
+
+  /*!
+   * \brief Get the vector of datasets that will be tested
+   *
+   * \return std::vector<std::string>
+   */
+  std::vector<std::string> getDataSetsToTest() { return _fiducialDataSetNames; };
+
+  /*!
+   * \brief Set the Fixed Epsilon value
+   *
+   * \param[in] newVal The new value of fixed epsilon
+   */
+  void setFixedEpsilon(double const &newVal) { _fixedEpsilon = newVal; };
+
+  /*!
+   * \brief Choose which datasets to test. By default it tests all the
+   * datasets in the fiducial data. A warning will be thrown if not all the
+   * datasets are being tested. Note that any call to this function will
+   * overwrite the default values
+   *
+   * \param[in] dataSetNames A std::vector of std::strings where each entry is
+   * a dataset name. Note that it is case sensitive
+   */
+  void setDataSetsToTest(std::vector<std::string> const &dataSetNames) { _fiducialDataSetNames = dataSetNames; };
+
+  /*!
+   * \brief Set the Compare Num Time Steps object
+   *
+   * \param[in] compare Defaults to `true`. If false then the number of
+   * timesteps is not compared.
+   */
+  void setCompareNumTimeSteps(bool const &compare) { _compareNumTimeSteps = compare; };
+
+  /*!
+   * \brief Set or add a fiducial dataset
+   *
+   * \param[in] fieldName The name of the field to be added
+   * \param[in] dataArr The std::vector for the data vector to be added as
+   * a data set
+   */
+  void setFiducialData(std::string const &fieldName, std::vector<double> const &dataVec);
+
+  /*!
+   * \brief Set the Fiducial Num Time Steps object
+   *
+   * \param numTimeSteps The number of time steps in the fiducial data
+   */
+  void setFiducialNumTimeSteps(int const &numTimeSteps) { _numFiducialTimeSteps = numTimeSteps; };
+
+  /*!
+   * \brief Generate an vector of the specified size populated by the specified
+   * value.
+   *
+   * \param[in] value The value to populate the vector with
+   * \param[in] nx (optional) The size of the field in the x-direction.
+   * Defaults to 1
+   * \param[in] ny (optional) The size of the field in the y-direction.
+   * Defaults to 1
+   * \param[in] nz (optional) The size of the field in the z-direction.
+   * Defaults to 1
+   * \return std::vector<double> A 1-dimensional std::vector of the required
+   * size containing the data.
+   */
+  std::vector<double> generateConstantData(double const &value, size_t const &nx = 1, size_t const &ny = 1,
+                                           size_t const &nz = 1);
+
+  /*!
+   * \brief Load the test data for physical fields from the HDF5 file(s). If
+   * there is more than one HDF5 file then it concatenates the contents into a
+   * single vector. Particle data is handeled with _loadTestParticleData
+   *
+   * \param[in] dataSetName The name of the dataset to get
+   * \param[out] testDims An vector with the length of each dimension in it
+   * \param[in] file (optional) The vector of HDF5 files to load
+   * \return std::vector<double> A vector containing the data
+   */
+  std::vector<double> loadTestFieldData(std::string dataSetName, std::vector<size_t> &testDims,
+                                        std::vector<H5::H5File> file = {});
+
+  /*!
+   * \brief Generate a std::vector of the specified size populated by a sine
+   * wave. The equation used to generate the wave is:
+   *
+   * wave = offset + amplitude * sin(kx*xIndex + ky*yIndex + kz*zIndex + phase)
+   *
+   * \param[in] offset Flat offset from zero
+   * \param[in] amplitude Amplitude of the wave
+   * \param[in] kx The x component of the wave vector in pixel units
+   * \param[in] ky The y component of the wave vector in pixel units
+   * \param[in] kz The z component of the wave vector in pixel units
+   * \param[in] phase Phase of the sine wave
+   * \param[in] nx (optional) The size of the field in the x-direction.
+   * Defaults to 1
+   * \param[in] ny (optional) The size of the field in the y-direction.
+   * Defaults to 1
+   * \param[in] nz (optional) The size of the field in the z-direction.
+   * Defaults to 1
+   * \return std::vector<double> A 1-dimensional std::vector of the required
+   * size containing the data.
+   */
+  std::vector<double> generateSineData(double const &offset, double const &amplitude, double const &kx,
+                                       double const &ky, double const &kz, double const &phase, size_t const &nx = 1,
+                                       size_t const &ny = 1, size_t const &nz = 1);
+
+  // Constructor and Destructor
+  /*!
+   * \brief Construct a new System Test Runner object
+   *
+   * \param[in] particleData Is there particle data?
+   * \param[in] hydroData Is there hydro data?
+   * \param[in] useFiducialFile Indicate if you're using a HDF5 file or will
+   * generate your own. Defaults to `true`, i.e. using an HDF5 file. Set to
+   * `false` to generate your own
+   * \param[in] useSettingsFile Indicate if you're using a settings file. If
+   * `true` then the settings file is automatically found based on the naming
+   * convention. If false then the user MUST provide all the required settings
+   * with the SystemTestRunner::chollaLaunchParams member variable
+   */
+  SystemTestRunner(bool const &particleData = false, bool const &hydroData = true, bool const &useFiducialFile = true,
+                   bool const &useSettingsFile = true);
+  ~SystemTestRunner();
+
+ private:
+  /// The fiducial dat file
+  H5::H5File _fiducialFile;
+  /// The test hydro field data files
+  std::vector<H5::H5File> _testHydroFieldsFileVec;
+  /// The test particle data files
+  std::vector<H5::H5File> _testParticlesFileVec;
+
+  /// The path to the Cholla executable
+  std::string _chollaPath;
+  /// The full name of the test with an underscore instead of a period. This
+  /// is the name of many of the input files, the output directory, etc
+  std::string _fullTestFileName;
+  /// The path to the Cholla settings file
+  std::string _chollaSettingsPath;
+  /// The path to the fiducial data file
+  std::string _fiducialFilePath;
+  /// The path to the output directory
+  std::string _outputDirectory;
+  /// The path and name of the console output file
+  std::string _consoleOutputPath;
+
+  /// A list of all the data set names in the fiducial data file
+  std::vector<std::string> _fiducialDataSetNames;
+  /// A list of all the data set names in the test data file
+  std::vector<std::string> _testDataSetNames;
+
+  /// The number of fiducial time steps
+  int _numFiducialTimeSteps;
+  /// Map of fiducial data sets if we're not using a fiducial file
+  std::unordered_map<std::string, std::vector<double>> _fiducialDataSets;
+
+  /// The test particle IDs
+  std::vector<double> _testParticleIDs;
+  /// The total number of particles in the test dataset
+  size_t _testTotalNumParticles = 0;
+  /// The fiducial particle IDs
+  std::vector<double> _fiducialParticleIDs;
+  /// The total number of particles in the fiducial dataset
+  size_t _fiducialTotalNumParticles = 0;
+
+  /// Fixed epsilon is changed from the default since AMD/Clang
+  /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12
+  double _fixedEpsilon = 5.0E-12;
+
+  /// The L2 norm of the error vector
+  double L2Norm_;
+
+  /// Flag to indicate if a fiducial HDF5 data file is being used or a
+  /// programmatically generated H5File object. `true` = use a file, `false` =
+  /// use generated H5File object
+  bool _fiducialFileExists = false;
+  /// Flag to choose whether or not to compare the number of time steps
+  bool _compareNumTimeSteps = true;
+
+  /// Flag to indicate whether or not there is hydro field data
+  /// If true then hydro data files are searched for and will be compared to
+  /// fiducial values. If false then it is assumed that the test produces no
+  /// hydro field data
+  bool _hydroDataExists = true;
+  /// Flag to indicate whether or not there is particle data
+  /// If true then particle data files are searched for and will be compared
+  /// to fiducial values. If false then it is assumed that the test produces
+  /// no particle data
+  bool _particleDataExists = false;
+
+  /*!
+   * \brief Using GTest assertions to check if the fiducial and test data have
+   * the same number of time steps
+   *
+   */
+  void _checkNumTimeSteps();
+
+  /*!
+   * \brief Load the test data for particles from the HDF5 file(s). If
+   * there is more than one HDF5 file then it concatenates the contents into a
+   * single vector. Field data is handeled with _loadTestFieldData
+   *
+   * \param[in] dataSetName The name of the dataset to get
+   * \return std::vector<double> A vector containing the data
+   */
+  std::vector<double> _loadTestParticleData(std::string const &dataSetName);
+
+  /*!
+   * \brief Load the test data for physical fields from the HDF5 file or
+   * returns the user set vector.
+   * Particle data is handeled with _loadFiducialParticleData.
+   *
+   * \param[in] dataSetName The name of the dataset to get
+   * \return std::vector<double> A vector with the contents of the data set
+   */
+  std::vector<double> _loadFiducialFieldData(std::string const &dataSetName);
+
+  /*!
+   * \brief Load the fiducial data for particles from the HDF5 file or return
+   * the user set vector. Field data is handeled with _loadFiducialFieldData
+   *
+   * \param[in] dataSetName The name of the dataset to get
+   * \return std::vector<double> A vector containing the data
+   */
+  std::vector<double> _loadFiducialParticleData(std::string const &dataSetName);
+
+  /*!
+   * \brief Return a vector of all the dataset names in the given HDF5 file
+   *
+   * \param[in] inputFile The HDF5 file to find names in
+   * \return std::vector<std::string>
+   */
+  std::vector<std::string> _findDataSetNames(H5::H5File const &inputFile);
+};  // End of class system_test::SystemTestRunner
diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h
index 422f3d151..db10a09b4 100644
--- a/src/utils/DeviceVector.h
+++ b/src/utils/DeviceVector.h
@@ -1,5 +1,5 @@
 /*!
- * \file device_vector.h
+ * \file DeviceVector.h
  * \author Robert 'Bob' Caddy (rvc@pitt.edu)
  * \brief Contains the declartion and implementation of the DeviceVector
  * class. Note that since this is a templated class the implementation must be
@@ -10,10 +10,11 @@
 #pragma once
 
 // STL Includes
-#include <vector>
-#include <string>
-#include <stdexcept>
 #include <algorithm>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <vector>
 
 // External Includes
 
@@ -27,305 +28,305 @@
 // =============================================================================
 namespace cuda_utilities
 {
-    /*!
-     * \brief A templatized class to encapsulate a device global memory pointer
-     * in a std::vector like interface complete with most of the usual methods.
-     * This class is intended to be used only in host code and does not work
-     * device side; Passing the pointer to a kernel can be done with the
-     * `data()` method. This class works for any device side pointer, scalar or
-     * array valued.
-     *
-     * \tparam T Any serialized type where `sizeof(T)` returns correct results
-     * should work but non-primitive types have not been tested.
-     */
-    template <typename T>
-    class DeviceVector
-    {
-    public:
-        /*!
-         * \brief Construct a new Device Vector object by calling the
-         * `_allocate` private method
-         *
-         * \param[in] size The number of elements desired in the array. Can be
-         * any positive integer.
-         */
-        DeviceVector(size_t const size) {_allocate(size);}
-
-        /*!
-         * \brief Destroy the Device Vector object by calling the `_deAllocate`
-         * private method
-         *
-         */
-        ~DeviceVector() {_deAllocate();}
-
-        /*!
-         * \brief Get the raw device pointer
-         *
-         * \return T* The pointer for the array in global memory
-         */
-        T* data() {return _ptr;}
-
-        /*!
-         * \brief Get the number of elements in the array.
-         *
-         * \return size_t The number of elements in the array
-         */
-        size_t size() {return _size;}
-
-        /*!
-         * \brief Overload the [] operator to return a value from device memory.
-         * This method performs a cudaMemcpy to copy the desired element to the
-         * host then returns it. Unlike the `at()` method this method does not
-         * perform bounds checking
-         *
-         * \param[in] index The index of the desired value
-         * \return T The value at dev_ptr[index]
-         */
-        T operator [] (size_t const &index);
-
-        /*!
-         * \brief Return a value from device memory. This method performs a
-         * cudaMemcpy to copy the desired element to the host then returns it.
-         * Unlike the `[]` overload this method perform bounds checking
-         *
-         * \param[in] index The index of the desired value
-         * \return T The value at dev_ptr[index]
-         */
-        T const at(size_t const index);
-
-        /*!
-        * \brief Assign a single value in the array. Should generally only be
-        * used when the pointer points to a scalar value. By default this
-        * writes `hostValue` to the 0th element of the array.
-        *
-        * \param[in] hostValue The value to write to the device array
-        * \param[in] index The location to write the value to, defaults to zero.
-        */
-        void assign(T const &hostValue, size_t const &index=0);
-
-        /*!
-         * \brief Resize the device container to contain `newSize` elements. If
-         * `newSize` is greater than the current size then all the values are
-         * kept and the rest of the array is default initialized. If `newSize`
-         * is smaller than the current size then the array is truncated and
-         * values at locations greater than `newSize` are lost. Keeping the
-         * values in the array requires that the new array be allocated, the
-         * values be copied, then the old array be freed; as such this method is
-         * quite slow and can use a large amount of memory. If you don't care
-         * about the values in the array then use the `reset` method
-         *
-         * \param[in] newSize The desired size of the array
-         */
-        void resize(size_t const newSize);
-
-        /*!
-         * \brief Reset the size of the array. This frees the old array and
-         * allocates a new one; all values in the array may be lost. The values
-         * in memory are not initialized and therefore the behaviour of the
-         * default values is undefined
-         *
-         * \param newSize
-         */
-        void reset(size_t const newSize);
-
-        /*!
-         * \brief Copy the first `arrSize` elements of `arrIn` to the device.
-         *
-         * \param[in] arrIn The pointer to the array to be copied to the device
-         * \param[in] arrSize The number of elements/size of the array to copy
-         * to the device
-         */
-        void cpyHostToDevice(const T * arrIn, size_t const &arrSize);
-
-        /*!
-         * \brief Copy the contents of a std::vector to the device
-         *
-         * \param[in] vecIn The array whose contents are to be copied
-         */
-        void cpyHostToDevice(std::vector<T> const &vecIn)
-            {cpyHostToDevice(vecIn.data(), vecIn.size());}
-
-        /*!
-         * \brief Copy the array from the device to a host array. Checks if the
-         * host array is large enough based on the `arrSize` parameter.
-         *
-         * \param[out] arrOut The pointer to the host array
-         * \param[in] arrSize The number of elements allocated in the host array
-         */
-        void cpyDeviceToHost(T * arrOut, size_t const &arrSize);
-
-        /*!
-         * \brief Copy the array from the device to a host std::vector. Checks
-         * if the host array is large enough.
-         *
-         * \param[out] vecOut The std::vector to copy the device array into
-         */
-        void cpyDeviceToHost(std::vector<T> &vecOut)
-            {cpyDeviceToHost(vecOut.data(), vecOut.size());}
-
-    private:
-        /// The size of the device array
-        size_t _size;
-
-        /// The pointer to the device array
-        T *_ptr=nullptr;
-
-        /*!
-         * \brief Allocate the device side array
-         *
-         * \param[in] size The size of the array to allocate
-         */
-        void _allocate(size_t const size)
-        {
-            _size=size;
-            CudaSafeCall(cudaMalloc(&_ptr, size*sizeof(T)));
-        }
-
-        /*!
-         * \brief Free the device side array
-         *
-         */
-        void _deAllocate(){CudaSafeCall(cudaFree(_ptr));}
-    };
-}  // End of cuda_utilities namespace
+/*!
+ * \brief A templatized class to encapsulate a device global memory pointer
+ * in a std::vector like interface complete with most of the usual methods.
+ * This class is intended to be used only in host code and does not work
+ * device side; Passing the pointer to a kernel can be done with the
+ * `data()` method. This class works for any device side pointer, scalar or
+ * array valued.
+ *
+ * \tparam T Any trivially copyable type where `sizeof(T)` returns correct
+ * results should work, but non-primitive types have not been tested.
+ */
+template <typename T>
+class DeviceVector
+{
+  static_assert(std::is_trivially_copyable_v<T>,
+                "DeviceVector can only be used with trivially_copyable types due to the internal "
+                "usage of functions like cudaMemcpy, cudaMemcpyPeer, cudaMemset");
+
+ public:
+  /*!
+   * \brief Construct a new Device Vector object by calling the
+   * `_allocate` private method
+   *
+   * \param[in] size The number of elements desired in the array. Can be
+   * any positive integer.
+   * \param[in] initialize (optional) If true then initialize the GPU
+   * memory to int(0)
+   */
+  DeviceVector(size_t const size, bool const initialize = false);
+
+  /*!
+   * \brief Destroy the Device Vector object by calling the `_deAllocate`
+   * private method
+   *
+   */
+  ~DeviceVector() { _deAllocate(); }
+
+  /* The following are deleted because they currently lead to invalid state.
+   * (But they can all easily be implemented in the future).
+   */
+  DeviceVector()                                           = delete;
+  DeviceVector(const DeviceVector<T> &)                    = delete;
+  DeviceVector(DeviceVector<T> &&)                         = delete;
+  DeviceVector<T> &operator=(const DeviceVector<T> &other) = delete;
+  DeviceVector<T> &operator=(DeviceVector<T> &&other)      = delete;
+
+  /*!
+   * \brief Get the raw device pointer
+   *
+   * \return T* The pointer for the array in global memory
+   */
+  T *data() { return _ptr; }
+
+  /*!
+   * \brief Get the number of elements in the array.
+   *
+   * \return size_t The number of elements in the array
+   */
+  size_t size() { return _size; }
+
+  /*!
+   * \brief Overload the [] operator to return a value from device memory.
+   * This method performs a cudaMemcpy to copy the desired element to the
+   * host then returns it. Unlike the `at()` method this method does not
+   * perform bounds checking
+   *
+   * \param[in] index The index of the desired value
+   * \return T The value at dev_ptr[index]
+   */
+  T operator[](size_t const &index);
+
+  /*!
+   * \brief Return a value from device memory. This method performs a
+   * cudaMemcpy to copy the desired element to the host then returns it.
+   * Unlike the `[]` overload this method perform bounds checking
+   *
+   * \param[in] index The index of the desired value
+   * \return T The value at dev_ptr[index]
+   */
+  T at(size_t const index);
+
+  /*!
+   * \brief Assign a single value in the array. Should generally only be
+   * used when the pointer points to a scalar value. By default this
+   * writes `hostValue` to the 0th element of the array.
+   *
+   * \param[in] hostValue The value to write to the device array
+   * \param[in] index The location to write the value to, defaults to zero.
+   */
+  void assign(T const &hostValue, size_t const &index = 0);
+
+  /*!
+   * \brief Resize the device container to contain `newSize` elements. If
+   * `newSize` is greater than the current size then all the values are
+   * kept and the rest of the array is default initialized. If `newSize`
+   * is smaller than the current size then the array is truncated and
+   * values at locations greater than `newSize` are lost. Keeping the
+   * values in the array requires that the new array be allocated, the
+   * values be copied, then the old array be freed; as such this method is
+   * quite slow and can use a large amount of memory. If you don't care
+   * about the values in the array then use the `reset` method
+   *
+   * \param[in] newSize The desired size of the array
+   */
+  void resize(size_t const newSize);
+
+  /*!
+   * \brief Reset the size of the array. This frees the old array and
+   * allocates a new one; all values in the array may be lost. The values
+   * in memory are not initialized and therefore the behaviour of the
+   * default values is undefined
+   *
+   * \param newSize
+   */
+  void reset(size_t const newSize);
+
+  /*!
+   * \brief Copy the first `arrSize` elements of `arrIn` to the device.
+   *
+   * \param[in] arrIn The pointer to the array to be copied to the device
+   * \param[in] arrSize The number of elements/size of the array to copy
+   * to the device
+   */
+  void cpyHostToDevice(const T *arrIn, size_t const &arrSize);
+
+  /*!
+   * \brief Copy the contents of a std::vector to the device
+   *
+   * \param[in] vecIn The array whose contents are to be copied
+   */
+  void cpyHostToDevice(std::vector<T> const &vecIn) { cpyHostToDevice(vecIn.data(), vecIn.size()); }
+
+  /*!
+   * \brief Copy the array from the device to a host array. Checks if the
+   * host array is large enough based on the `arrSize` parameter.
+   *
+   * \param[out] arrOut The pointer to the host array
+   * \param[in] arrSize The number of elements allocated in the host array
+   */
+  void cpyDeviceToHost(T *arrOut, size_t const &arrSize);
+
+  /*!
+   * \brief Copy the array from the device to a host std::vector. Checks
+   * if the host array is large enough.
+   *
+   * \param[out] vecOut The std::vector to copy the device array into
+   */
+  void cpyDeviceToHost(std::vector<T> &vecOut) { cpyDeviceToHost(vecOut.data(), vecOut.size()); }
+
+ private:
+  /// The size of the device array
+  size_t _size;
+
+  /// The pointer to the device array
+  T *_ptr = nullptr;
+
+  /*!
+   * \brief Allocate the device side array
+   *
+   * \param[in] size The size of the array to allocate
+   */
+  void _allocate(size_t const size)
+  {
+    _size = size;
+    GPU_Error_Check(cudaMalloc(&_ptr, _size * sizeof(T)));
+  }
+
+  /*!
+   * \brief Free the device side array
+   *
+   */
+  void _deAllocate() { GPU_Error_Check(cudaFree(_ptr)); }
+};
+}  // namespace cuda_utilities
 // =============================================================================
 // End declaration of DeviceVector class
 // =============================================================================
 
-
 // =============================================================================
 // Definition of DeviceVector class
 // =============================================================================
 namespace cuda_utilities
 {
+// =========================================================================
+// Public Methods
+// =========================================================================
 
-    // =========================================================================
-    // Public Methods
-    // =========================================================================
-
-    // =========================================================================
-    template <typename T>
-    void DeviceVector<T>::resize(size_t const newSize)
-    {
-        // Assign old array to a new pointer
-        T * oldDevPtr = _ptr;
-
-        // Determine how many elements to copy
-        size_t const count = std::min(_size, newSize) * sizeof(T);
-
-        // Allocate new array
-        _allocate(newSize);
-
-        // Copy the values from the old array to the new array
-        CudaSafeCall(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count));
-
-        // Free the old array
-        CudaSafeCall(cudaFree(oldDevPtr));
-    }
-    // =========================================================================
-
-    // =========================================================================
-    template <typename T>
-    void DeviceVector<T>::reset(size_t const newSize)
-    {
-        _deAllocate();
-        _allocate(newSize);
-    }
-    // =========================================================================
-
-    // =========================================================================
-    template <typename T>
-    T DeviceVector<T>::operator [] (size_t const &index)
-    {
-        T hostValue;
-        CudaSafeCall(cudaMemcpy(&hostValue,
-                                &(_ptr[index]),
-                                sizeof(T),
-                                cudaMemcpyDeviceToHost));
-        return hostValue;
-    }
-    // =========================================================================
-
-    // =========================================================================
-    template <typename T>
-    T const DeviceVector<T>::at(size_t const index)
-    {
-        if (index < _size)
-        {
-            // Use the overloaded [] operator to grab the value from GPU memory
-            // into host memory
-            return (*this)[index];
-        }
-        else
-        {
-            throw std::out_of_range("Warning: DeviceVector.at() detected an"
-                                    " out of bounds memory access. Tried to"
-                                    " access element "
-                                    + std::to_string(index)
-                                    + " of "
-                                    + std::to_string(_size));
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    template <typename T>
-    void DeviceVector<T>::assign(T const &hostValue, size_t const &index)
-    {
-        CudaSafeCall(cudaMemcpy(&(_ptr[index]),  // destination
-                                &hostValue,      // source
-                                sizeof(T),
-                                cudaMemcpyHostToDevice));
-    }
-    // =========================================================================
-
-    // =========================================================================
-    template <typename T>
-    void DeviceVector<T>::cpyHostToDevice(const T * arrIn, size_t const &arrSize)
-    {
-        if (arrSize <= _size)
-        {
-            CudaSafeCall(cudaMemcpy(_ptr,
-                                    arrIn,
-                                    arrSize*sizeof(T),
-                                    cudaMemcpyHostToDevice));
-        }
-        else
-        {
-            throw std::out_of_range("Warning: Couldn't copy array to device,"
-                                    " device array is too small. Host array"
-                                    " size="
-                                    + std::to_string(arrSize)
-                                    + ", device array size="
-                                    + std::to_string(arrSize));
-        }
-
-    }
-    // =========================================================================
-
-    // =========================================================================
-    template <typename T>
-    void DeviceVector<T>::cpyDeviceToHost(T * arrOut, size_t const &arrSize)
-    {
-        if (_size <= arrSize)
-        {
-            CudaSafeCall(cudaMemcpy(arrOut,
-                                    _ptr,
-                                    _size*sizeof(T),
-                                    cudaMemcpyDeviceToHost));
-        }
-        else
-        {
-            throw std::out_of_range("Warning: Couldn't copy array to host, "
-                                    "host array is too small. Host array "
-                                    "size="
-                                    + std::to_string(arrSize)
-                                    + ", device array size="
-                                    + std::to_string(arrSize));
-        }
-    }
-    // =========================================================================
-} // end namespace cuda_utilities
-// =============================================================================
-// End definition of DeviceVector class
-// =============================================================================
\ No newline at end of file
+// =========================================================================
+template <typename T>
+DeviceVector<T>::DeviceVector(size_t const size, bool const initialize)
+{
+  _allocate(size);
+
+  if (initialize) {
+    GPU_Error_Check(cudaMemset(_ptr, 0, _size * sizeof(T)));
+  }
+}
+// =========================================================================
+
+// =========================================================================
+template <typename T>
+void DeviceVector<T>::resize(size_t const newSize)
+{
+  // Assign old array to a new pointer
+  T *oldDevPtr = _ptr;
+
+  // Determine how many elements to copy
+  size_t const count = std::min(_size, newSize) * sizeof(T);
+
+  // Allocate new array
+  _allocate(newSize);
+
+  // Copy the values from the old array to the new array
+  GPU_Error_Check(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count));
+
+  // Free the old array
+  GPU_Error_Check(cudaFree(oldDevPtr));
+}
+// =========================================================================
+
+// =========================================================================
+template <typename T>
+void DeviceVector<T>::reset(size_t const newSize)
+{
+  _deAllocate();
+  _allocate(newSize);
+}
+// =========================================================================
+
+// =========================================================================
+template <typename T>
+T DeviceVector<T>::operator[](size_t const &index)
+{
+  T hostValue;
+  GPU_Error_Check(cudaMemcpy(&hostValue, &(_ptr[index]), sizeof(T), cudaMemcpyDeviceToHost));
+  return hostValue;
+}
+// =========================================================================
+
+// =========================================================================
+template <typename T>
+T DeviceVector<T>::at(size_t const index)
+{
+  if (index < _size) {
+    // Use the overloaded [] operator to grab the value from GPU memory
+    // into host memory
+    return (*this)[index];
+  } else {
+    throw std::out_of_range(
+        "Warning: DeviceVector.at() detected an"
+        " out of bounds memory access. Tried to"
+        " access element " +
+        std::to_string(index) + " of " + std::to_string(_size));
+  }
+}
+// =========================================================================
+
+// =========================================================================
+template <typename T>
+void DeviceVector<T>::assign(T const &hostValue, size_t const &index)
+{
+  GPU_Error_Check(cudaMemcpy(&(_ptr[index]),  // destination
+                             &hostValue,      // source
+                             sizeof(T), cudaMemcpyHostToDevice));
+}
+// =========================================================================
+
+// =========================================================================
+template <typename T>
+void DeviceVector<T>::cpyHostToDevice(const T *arrIn, size_t const &arrSize)
+{
+  if (arrSize <= _size) {
+    GPU_Error_Check(cudaMemcpy(_ptr, arrIn, arrSize * sizeof(T), cudaMemcpyHostToDevice));
+  } else {
+    throw std::out_of_range(
+        "Warning: Couldn't copy array to device,"
+        " device array is too small. Host array"
+        " size=" +
+        std::to_string(arrSize) + ", device array size=" + std::to_string(arrSize));
+  }
+}
+// =========================================================================
+
+// =========================================================================
+template <typename T>
+void DeviceVector<T>::cpyDeviceToHost(T *arrOut, size_t const &arrSize)
+{
+  if (_size <= arrSize) {
+    GPU_Error_Check(cudaMemcpy(arrOut, _ptr, _size * sizeof(T), cudaMemcpyDeviceToHost));
+  } else {
+    throw std::out_of_range(
+        "Warning: Couldn't copy array to host, "
+        "host array is too small. Host array "
+        "size=" +
+        std::to_string(arrSize) + ", device array size=" + std::to_string(arrSize));
+  }
+}
+// =========================================================================
+}  // end namespace cuda_utilities
+   // =============================================================================
+   // End definition of DeviceVector class
+   // =============================================================================
\ No newline at end of file
diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu
index 26a63dbca..6acd84308 100644
--- a/src/utils/DeviceVector_tests.cu
+++ b/src/utils/DeviceVector_tests.cu
@@ -1,195 +1,195 @@
 /*!
- * \file device_vector_tests.cu
+ * \file DeviceVector_tests.cu
  * \author Robert 'Bob' Caddy (rvc@pitt.edu)
  * \brief Tests for the DeviceVector class
  *
  */
 
 // STL Includes
-#include <vector>
-#include <string>
 #include <iostream>
 #include <numeric>
+#include <string>
+#include <vector>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local Includes
 #include "../global/global.h"
-#include "../utils/testing_utilities.h"
 #include "../utils/DeviceVector.h"
+#include "../utils/testing_utilities.h"
 
-
-namespace // Anonymous namespace
+namespace  // Anonymous namespace
 {
-    template <typename T>
-    void checkPointerAttributes(cuda_utilities::DeviceVector<T> &devVector)
-    {
-        // Get the pointer information
-        cudaPointerAttributes ptrAttributes;
-        CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data()));
-
-        // Warning strings
-        std::string typeMessage          = "ptrAttributes.type should be 2 since "
-                                           "that indicates type cudaMemoryTypeDevice. "
-                                           "0 is cudaMemoryTypeUnregistered, "
-                                           "1 is cudaMemoryTypeHost, and "
-                                           "3 is cudaMemoryTypeManaged";
-        std::string const deviceMessage  = "The pointer should be on device 0";
-        std::string const devPtrMessage  = "The device pointer is nullptr";
-        std::string const hostPtrMessage = "The host pointer is not nullptr";
-
-        // Check that the pointer information is correct
-        #ifdef  O_HIP
-            typeMessage = "ptrAttributes.memoryType should be 1 since that indicates a HIP device pointer.";
-            EXPECT_EQ(1, ptrAttributes.memoryType)      << typeMessage;
-        #else // O_HIP is not defined i.e. we're using CUDA
-            EXPECT_EQ(2, ptrAttributes.type)            << typeMessage;
-        #endif  // O_HIP
-        EXPECT_EQ(0, ptrAttributes.device)              << deviceMessage;
-        EXPECT_NE(nullptr, ptrAttributes.devicePointer) << devPtrMessage;
-        EXPECT_EQ(nullptr, ptrAttributes.hostPointer)   << hostPtrMessage;
-    }
-} // Anonymous namespace
+template <typename T>
+void Check_Pointer_Attributes(cuda_utilities::DeviceVector<T> &devVector)
+{
+  // Get the pointer information
+  cudaPointerAttributes ptrAttributes;
+  GPU_Error_Check(cudaPointerGetAttributes(&ptrAttributes, devVector.data()));
+
+  // Warning strings
+  std::string typeMessage =
+      "ptrAttributes.type should be 2 since "
+      "that indicates type cudaMemoryTypeDevice. "
+      "0 is cudaMemoryTypeUnregistered, "
+      "1 is cudaMemoryTypeHost, and "
+      "3 is cudaMemoryTypeManaged";
+  std::string const deviceMessage  = "The pointer should be on device 0";
+  std::string const devPtrMessage  = "The device pointer is nullptr";
+  std::string const hostPtrMessage = "The host pointer is not nullptr";
+
+// Check that the pointer information is correct
+#ifdef O_HIP
+  typeMessage =
+      "ptrAttributes.memoryType should be 1 since that indicates a HIP device "
+      "pointer.";
+  EXPECT_EQ(1, ptrAttributes.memoryType) << typeMessage;
+#else   // O_HIP is not defined i.e. we're using CUDA
+  EXPECT_EQ(2, ptrAttributes.type) << typeMessage;
+#endif  // O_HIP
+  EXPECT_EQ(0, ptrAttributes.device) << deviceMessage;
+  EXPECT_NE(nullptr, ptrAttributes.devicePointer) << devPtrMessage;
+  EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage;
+}
+}  // Anonymous namespace
 
 // =============================================================================
 // Tests for expected behavior
 // =============================================================================
-TEST(tALLDeviceVectorConstructor,
-     CheckConstructorDataAndSizeExpectProperAllocationAndValues)
+TEST(tALLDeviceVectorConstructor, CheckConstructorDataAndSizeExpectProperAllocationAndValues)
 {
-    // Initialize the DeviceVector
-    size_t const vectorSize = 10;
-    cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  // Initialize the DeviceVector
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
 
-    // Check that the size is correct
-    EXPECT_EQ(vectorSize, devVector.size());
+  // Check that the size is correct
+  EXPECT_EQ(vectorSize, devVector.size());
 
-    // Check the pointer information
-    checkPointerAttributes<double>(devVector);
+  // Check the pointer information
+  Check_Pointer_Attributes<double>(devVector);
 }
 
-TEST(tALLDeviceVectorDestructor,
-     CheckDestructorExpectProperDeallocation)
+TEST(tALLDeviceVectorDestructor, CheckDestructorExpectProperDeallocation)
 {
-   // Initialize the DeviceVector
-   size_t const vectorSize = 10;
-   cuda_utilities::DeviceVector<double> devVector{vectorSize};
-
-    // Destruct the object
-    devVector.~DeviceVector();
-
-   // Get the pointer information
-   cudaPointerAttributes ptrAttributes;
-   CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data()));
-
-    // Warning strings
-    std::string typeMessage          = "ptrAttributes.type should be 0 since "
-                                       "that indicates type cudaMemoryTypeUnregistered"
-                                       "0 is cudaMemoryTypeUnregistered, "
-                                       "1 is cudaMemoryTypeHost, "
-                                       "2 is cudaMemoryTypeDevice, and"
-                                       "3 is cudaMemoryTypeManaged";
-    std::string deviceMessage        = "The pointer should be null which is device -2";
-    std::string const devPtrMessage  = "The device pointer is nullptr";
-    std::string const hostPtrMessage = "The host pointer is not nullptr";
-
-    // Check that the pointer information is correct
-    #ifdef  O_HIP
-        typeMessage = "ptrAttributes.memoryType should be 1 since that indicates a HIP device pointer.";
-        deviceMessage = "The pointer should be 0";
-        EXPECT_EQ(0, ptrAttributes.memoryType)      << typeMessage;
-        EXPECT_EQ(0, ptrAttributes.device)          << deviceMessage;
-    #else // O_HIP is not defined i.e. we're using CUDA
-        EXPECT_EQ(0, ptrAttributes.type)            << typeMessage;
-        EXPECT_EQ(-2, ptrAttributes.device)         << deviceMessage;
-    #endif  // O_HIP
-    EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage;
-    EXPECT_EQ(nullptr, ptrAttributes.hostPointer)   << hostPtrMessage;
+  // Initialize the DeviceVector
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+
+  // Destruct the object
+  devVector.~DeviceVector();
+
+  // Get the pointer information
+  cudaPointerAttributes ptrAttributes;
+  cudaPointerGetAttributes(&ptrAttributes, devVector.data());
+
+  // Warning strings
+  std::string typeMessage =
+      "ptrAttributes.type should be 0 since "
+      "that indicates type cudaMemoryTypeUnregistered"
+      "0 is cudaMemoryTypeUnregistered, "
+      "1 is cudaMemoryTypeHost, "
+      "2 is cudaMemoryTypeDevice, and"
+      "3 is cudaMemoryTypeManaged";
+  std::string deviceMessage        = "The pointer should be null which is device -2";
+  std::string const devPtrMessage  = "The device pointer is nullptr";
+  std::string const hostPtrMessage = "The host pointer is not nullptr";
+
+// Check that the pointer information is correct
+#ifdef O_HIP
+  typeMessage =
+      "ptrAttributes.memoryType should be 1 since that indicates a HIP device "
+      "pointer.";
+  deviceMessage = "The pointer should be 0";
+  EXPECT_EQ(0, ptrAttributes.memoryType) << typeMessage;
+  EXPECT_EQ(0, ptrAttributes.device) << deviceMessage;
+#else   // O_HIP is not defined i.e. we're using CUDA
+  EXPECT_EQ(0, ptrAttributes.type) << typeMessage;
+  EXPECT_EQ(-2, ptrAttributes.device) << deviceMessage;
+#endif  // O_HIP
+  EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage;
+  EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage;
+
+  // Reconstruct DeviceVector object to avoid error
+  new (&devVector) cuda_utilities::DeviceVector<double>{vectorSize};
 }
 
 TEST(tALLDeviceVectorStdVectorHostToDeviceCopyAndIndexing,
      CheckDeviceMemoryValuesAndIndexingOperationsExpectCorrectMemoryValues)
 {
-    // Initialize the vectors
-    size_t const vectorSize = 10;
-    cuda_utilities::DeviceVector<double> devVector{vectorSize};
-    std::vector<double> stdVec(vectorSize);
-    std::iota(stdVec.begin(), stdVec.end(), 0);
-
-    // Copy the value to the device memory
-    devVector.cpyHostToDevice(stdVec);
-
-    // Check the values in device memory with both the .at() method and
-    // overloaded [] operator
-    for (size_t i = 0; i < vectorSize; i++)
-    {
-        EXPECT_EQ(stdVec.at(i), devVector.at(i));
-        EXPECT_EQ(stdVec.at(i), devVector[i]);
-    }
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(vectorSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+
+  // Copy the value to the device memory
+  devVector.cpyHostToDevice(stdVec);
+
+  // Check the values in device memory with both the .at() method and
+  // overloaded [] operator
+  for (size_t i = 0; i < vectorSize; i++) {
+    EXPECT_EQ(stdVec.at(i), devVector.at(i));
+    EXPECT_EQ(stdVec.at(i), devVector[i]);
+  }
 }
 
 TEST(tALLDeviceVectorArrayHostToDeviceCopyAndIndexing,
      CheckDeviceMemoryValuesAndIndexingOperationsExpectCorrectMemoryValues)
 {
-   // Initialize the vectors
-   size_t const vectorSize = 10;
-   cuda_utilities::DeviceVector<double> devVector{vectorSize};
-   std::vector<double> stdVec(vectorSize);
-   std::iota(stdVec.begin(), stdVec.end(), 0);
-
-   // Copy the value to the device memory
-   devVector.cpyHostToDevice(stdVec.data(), stdVec.size());
-
-   // Check the values in device memory with both the .at() method and
-   // overloaded [] operator
-   for (size_t i = 0; i < vectorSize; i++)
-   {
-       EXPECT_EQ(stdVec.at(i), devVector.at(i));
-       EXPECT_EQ(stdVec.at(i), devVector[i]);
-   }
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(vectorSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+
+  // Copy the value to the device memory
+  devVector.cpyHostToDevice(stdVec.data(), stdVec.size());
+
+  // Check the values in device memory with both the .at() method and
+  // overloaded [] operator
+  for (size_t i = 0; i < vectorSize; i++) {
+    EXPECT_EQ(stdVec.at(i), devVector.at(i));
+    EXPECT_EQ(stdVec.at(i), devVector[i]);
+  }
 }
 
-TEST(tALLDeviceVectorArrayAssignmentMethod,
-     AssignSingleValuesExpectCorrectMemoryValues)
+TEST(tALLDeviceVectorArrayAssignmentMethod, AssignSingleValuesExpectCorrectMemoryValues)
 {
-    // Initialize the vectors
-    size_t const vectorSize = 10;
-    cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
 
-    // Perform assignment
-    devVector.assign(13);
-    devVector.assign(17,4);
+  // Perform assignment
+  devVector.assign(13);
+  devVector.assign(17, 4);
 
-    // Check the values in device memory
-    EXPECT_EQ(13, devVector.at(0));
-    EXPECT_EQ(17, devVector.at(4));
+  // Check the values in device memory
+  EXPECT_EQ(13, devVector.at(0));
+  EXPECT_EQ(17, devVector.at(4));
 }
 
-TEST(tALLDeviceVectorStdVectorDeviceToHostCopy,
-     CheckHostMemoryValuesExpectCorrectMemoryValues)
+TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, CheckHostMemoryValuesExpectCorrectMemoryValues)
 {
-   // Initialize the vectors
-   size_t const vectorSize = 10;
-   cuda_utilities::DeviceVector<double> devVector{vectorSize};
-   std::vector<double> stdVec(vectorSize), hostVec(vectorSize);
-   std::iota(stdVec.begin(), stdVec.end(), 0);
-
-   // Copy the value to the device memory
-   devVector.cpyHostToDevice(stdVec);
-
-   // Copy the values to the host memory
-   devVector.cpyDeviceToHost(hostVec);
-
-   // Check the values
-   for (size_t i = 0; i < vectorSize; i++)
-   {
-       EXPECT_EQ(stdVec.at(i), hostVec.at(i));
-   }
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(vectorSize), hostVec(vectorSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+
+  // Copy the value to the device memory
+  devVector.cpyHostToDevice(stdVec);
+
+  // Copy the values to the host memory
+  devVector.cpyDeviceToHost(hostVec);
+
+  // Check the values
+  for (size_t i = 0; i < vectorSize; i++) {
+    EXPECT_EQ(stdVec.at(i), hostVec.at(i));
+  }
 }
 
-TEST(tALLDeviceVectorArrayDeviceToHostCopy,
-     CheckHostMemoryValuesExpectCorrectMemoryValues)
+TEST(tALLDeviceVectorArrayDeviceToHostCopy, CheckHostMemoryValuesExpectCorrectMemoryValues)
 {
   // Initialize the vectors
   size_t const vectorSize = 10;
@@ -204,145 +204,138 @@ TEST(tALLDeviceVectorArrayDeviceToHostCopy,
   devVector.cpyDeviceToHost(hostVec.data(), hostVec.size());
 
   // Check the values
-  for (size_t i = 0; i < vectorSize; i++)
-  {
-      EXPECT_EQ(stdVec.at(i), hostVec.at(i));
+  for (size_t i = 0; i < vectorSize; i++) {
+    EXPECT_EQ(stdVec.at(i), hostVec.at(i));
   }
 }
 
-TEST(tALLDeviceVectorReset,
-     SetNewSizeExpectCorrectSize)
+TEST(tALLDeviceVectorReset, SetNewSizeExpectCorrectSize)
 {
-    // Initialize the vectors
-    size_t const vectorSize = 10;
-    size_t const newSize    = 20;
-    cuda_utilities::DeviceVector<double> devVector{vectorSize};
-    std::vector<double> stdVec(vectorSize), newVec(newSize);
-    std::iota(stdVec.begin(), stdVec.end(), 0);
-    std::iota(newVec.begin(), newVec.end(), 20);
-
-    // Copy the value to the device memory
-    devVector.cpyHostToDevice(stdVec);
-
-    // Reset the vector
-    devVector.reset(newSize);
-
-    // Check the size
-    EXPECT_EQ(newSize, devVector.size());
-
-    // Check the pointer
-    checkPointerAttributes(devVector);
-
-    // Copy the new values into device memory
-    devVector.cpyHostToDevice(newVec);
-
-    // Check the values
-    for (size_t i = 0; i < newSize; i++)
-    {
-        EXPECT_EQ(newVec.at(i), devVector.at(i));
-    }
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  size_t const newSize    = 20;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(vectorSize), newVec(newSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+  std::iota(newVec.begin(), newVec.end(), 20);
+
+  // Copy the value to the device memory
+  devVector.cpyHostToDevice(stdVec);
+
+  // Reset the vector
+  devVector.reset(newSize);
+
+  // Check the size
+  EXPECT_EQ(newSize, devVector.size());
+
+  // Check the pointer
+  Check_Pointer_Attributes(devVector);
+
+  // Copy the new values into device memory
+  devVector.cpyHostToDevice(newVec);
+
+  // Check the values
+  for (size_t i = 0; i < newSize; i++) {
+    EXPECT_EQ(newVec.at(i), devVector.at(i));
+  }
 }
 
-TEST(tALLDeviceVectorResize,
-     SetLargerSizeExpectCorrectSize)
+TEST(tALLDeviceVectorResize, SetLargerSizeExpectCorrectSize)
 {
-    // Initialize the vectors
-    size_t const originalSize = 10;
-    size_t const newSize    = 20;
-    cuda_utilities::DeviceVector<double> devVector{originalSize};
-    std::vector<double> stdVec(originalSize);
-    std::iota(stdVec.begin(), stdVec.end(), 0);
-
-    // Copy the value to the device memory
-    devVector.cpyHostToDevice(stdVec);
-
-    // Reset the vector
-    devVector.resize(newSize);
-
-    // Check the size
-    EXPECT_EQ(newSize, devVector.size());
-
-    // Check the pointer
-    checkPointerAttributes(devVector);
-
-    // Check the values
-    for (size_t i = 0; i < originalSize; i++)
-    {
-        double const fiducialValue = (i < stdVec.size())? stdVec.at(i): 0;
-        EXPECT_EQ(fiducialValue, devVector.at(i));
-    }
+  // Initialize the vectors
+  size_t const originalSize = 10;
+  size_t const newSize      = 20;
+  cuda_utilities::DeviceVector<double> devVector{originalSize};
+  std::vector<double> stdVec(originalSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+
+  // Copy the value to the device memory
+  devVector.cpyHostToDevice(stdVec);
+
+  // Reset the vector
+  devVector.resize(newSize);
+
+  // Check the size
+  EXPECT_EQ(newSize, devVector.size());
+
+  // Check the pointer
+  Check_Pointer_Attributes(devVector);
+
+  // Check the values
+  for (size_t i = 0; i < originalSize; i++) {
+    double const fiducialValue = (i < stdVec.size()) ? stdVec.at(i) : 0;
+    EXPECT_EQ(fiducialValue, devVector.at(i));
+  }
 }
 
-TEST(tALLDeviceVectorResize,
-     SetSmallerSizeExpectCorrectSize)
+TEST(tALLDeviceVectorResize, SetSmallerSizeExpectCorrectSize)
 {
-    // Initialize the vectors
-    size_t const vectorSize = 10;
-    size_t const newSize    = 5;
-    cuda_utilities::DeviceVector<double> devVector{vectorSize};
-    std::vector<double> stdVec(vectorSize);
-    std::iota(stdVec.begin(), stdVec.end(), 0);
-
-    // Copy the value to the device memory
-    devVector.cpyHostToDevice(stdVec);
-
-    // Reset the vector
-    devVector.resize(newSize);
-
-    // Check the size
-    EXPECT_EQ(newSize, devVector.size());
-
-    // Check the pointer
-    checkPointerAttributes(devVector);
-
-    // Check the values
-    for (size_t i = 0; i < newSize; i++)
-    {
-        EXPECT_EQ(stdVec.at(i), devVector.at(i));
-    }
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  size_t const newSize    = 5;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(vectorSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+
+  // Copy the value to the device memory
+  devVector.cpyHostToDevice(stdVec);
+
+  // Reset the vector
+  devVector.resize(newSize);
+
+  // Check the size
+  EXPECT_EQ(newSize, devVector.size());
+
+  // Check the pointer
+  Check_Pointer_Attributes(devVector);
+
+  // Check the values
+  for (size_t i = 0; i < newSize; i++) {
+    EXPECT_EQ(stdVec.at(i), devVector.at(i));
+  }
 }
 
 // =============================================================================
 // Tests for exceptions
 // =============================================================================
-TEST(tALLDeviceVectorAt,
-     OutOfBoundsAccessExpectThrowOutOfRange)
+TEST(tALLDeviceVectorAt, OutOfBoundsAccessExpectThrowOutOfRange)
 {
-   // Initialize the vectors
-   size_t const vectorSize = 10;
-   cuda_utilities::DeviceVector<double> devVector{vectorSize};
-   std::vector<double> stdVec(vectorSize);
-   std::iota(stdVec.begin(), stdVec.end(), 0);
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(vectorSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
 
-   // Copy the value to the device memory
-   devVector.cpyHostToDevice(stdVec);
+  // Copy the value to the device memory
+  devVector.cpyHostToDevice(stdVec);
 
-   // Check that the .at() method throws the correct exception
-   EXPECT_THROW(devVector.at(100), std::out_of_range);
+  // Check that the .at() method throws the correct exception
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
+  EXPECT_THROW(devVector.at(100), std::out_of_range);
 }
 
-TEST(tALLDeviceVectorStdVectorHostToDeviceCopy,
-    OutOfBoundsCopyExpectThrowOutOfRange)
+TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, OutOfBoundsCopyExpectThrowOutOfRange)
 {
-    // Initialize the vectors
-    size_t const vectorSize = 10;
-    cuda_utilities::DeviceVector<double> devVector{vectorSize};
-    std::vector<double> stdVec(2*vectorSize);
-    std::iota(stdVec.begin(), stdVec.end(), 0);
-
-    // Copy the value to the device memory
-    EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range);
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(2 * vectorSize);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+
+  // Copy the value to the device memory
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
+  EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range);
 }
 
-TEST(tALLDeviceVectorStdVectorDeviceToHostCopy,
-    OutOfBoundsCopyExpectThrowOutOfRange)
+TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, OutOfBoundsCopyExpectThrowOutOfRange)
 {
-    // Initialize the vectors
-    size_t const vectorSize = 10;
-    cuda_utilities::DeviceVector<double> devVector{vectorSize};
-    std::vector<double> stdVec(vectorSize/2);
-    std::iota(stdVec.begin(), stdVec.end(), 0);
-
-    // Copy the value to the device memory
-    EXPECT_THROW(devVector.cpyDeviceToHost(stdVec), std::out_of_range);
+  // Initialize the vectors
+  size_t const vectorSize = 10;
+  cuda_utilities::DeviceVector<double> devVector{vectorSize};
+  std::vector<double> stdVec(vectorSize / 2);
+  std::iota(stdVec.begin(), stdVec.end(), 0);
+
+  // Copy the value to the device memory
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto)
+  EXPECT_THROW(devVector.cpyDeviceToHost(stdVec), std::out_of_range);
 }
diff --git a/src/utils/cuda_utilities.cpp b/src/utils/cuda_utilities.cpp
index a924b3f76..142266159 100644
--- a/src/utils/cuda_utilities.cpp
+++ b/src/utils/cuda_utilities.cpp
@@ -1,5 +1,37 @@
+/*!
+ * \file cuda_utilities.cpp
+ * \brief Implementation file for cuda_utilities.h
+ *
+ */
 #include "../utils/cuda_utilities.h"
 
-namespace cuda_utilities {
+#include <iomanip>
+#include <sstream>
 
-} // end namespace cuda_utilities
+#include "../io/io.h"
+#include "../mpi/mpi_routines.h"
+
+namespace cuda_utilities
+{
+void Print_GPU_Memory_Usage(std::string const &additional_text)
+{
+  // Get the memory usage
+  size_t gpu_free_memory, gpu_total_memory;
+  GPU_Error_Check(cudaMemGetInfo(&gpu_free_memory, &gpu_total_memory));
+
+  // Assuming that all GPUs in the system have the same amount of memory
+  size_t const gpu_used_memory = Reduce_size_t_Max(gpu_total_memory - gpu_free_memory);
+
+  Real const percent_used = 100.0 * (static_cast<Real>(gpu_used_memory) / static_cast<Real>(gpu_total_memory));
+
+  // Prep the message to print
+  std::stringstream output_message_stream;
+  output_message_stream << std::fixed << std::setprecision(2);
+  output_message_stream << "Percentage of GPU memory used: " << percent_used << "%. GPU memory used "
+                        << std::to_string(gpu_used_memory) << ", GPU total memory " << std::to_string(gpu_total_memory)
+                        << additional_text << std::endl;
+  std::string output_message = output_message_stream.str();
+
+  chprintf(output_message.c_str());
+}
+}  // end namespace cuda_utilities
diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h
index 9c07a95a6..85927d532 100644
--- a/src/utils/cuda_utilities.h
+++ b/src/utils/cuda_utilities.h
@@ -7,93 +7,131 @@
 
 #pragma once
 
+#include <string>
+
 // Local Includes
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../utils/gpu.hpp"
 
-
 namespace cuda_utilities
 {
-    /*!
-     * \brief Compute the x, y, and z indices based off of the 1D index
-     *
-     * \param[in] id The 1D index
-     * \param[in] nx The total number of cells in the x direction
-     * \param[in] ny The total number of cells in the y direction
-     * \param[out] xid The x index
-     * \param[out] yid The y index
-     * \param[out] zid The z index
-     */
-    inline __host__ __device__ void compute3DIndices(int const &id,
-                                                     int const &nx,
-                                                     int const &ny,
-                                                     int &xid,
-                                                     int &yid,
-                                                     int &zid)
-    {
-        zid = id / (nx * ny);
-        yid = (id - zid * nx * ny) / nx;
-        xid = id - zid * nx * ny - yid * nx;
-    }
+/*!
+ * \brief Compute the x, y, and z indices based off of the 1D index
+ *
+ * \param[in] id The 1D index
+ * \param[in] nx The total number of cells in the x direction
+ * \param[in] ny The total number of cells in the y direction
+ * \param[out] xid The x index
+ * \param[out] yid The y index
+ * \param[out] zid The z index
+ */
+inline __host__ __device__ void compute3DIndices(int const &id, int const &nx, int const &ny, int &xid, int &yid,
+                                                 int &zid)
+{
+  zid = id / (nx * ny);
+  yid = (id - zid * nx * ny) / nx;
+  xid = id - zid * nx * ny - yid * nx;
+}
 
-    /*!
-     * \brief Compute the 1D index based off of the 3D indices
-     *
-     * \param xid The x index
-     * \param yid The y index
-     * \param zid The z index
-     * \param nx The total number of cells in the x direction
-     * \param ny The total number of cells in the y direction
-     * \return int The 1D index
-     */
-    inline __host__ __device__ int compute1DIndex(int const &xid,
-                                                  int const &yid,
-                                                  int const &zid,
-                                                  int const &nx,
-                                                  int const &ny)
-    {
-        return xid + yid*nx + zid*nx*ny;
-    }
+/*!
+ * \brief Compute the 1D index based off of the 3D indices
+ *
+ * \param xid The x index
+ * \param yid The y index
+ * \param zid The z index
+ * \param nx The total number of cells in the x direction
+ * \param ny The total number of cells in the y direction
+ * \return int The 1D index
+ */
+inline __host__ __device__ int compute1DIndex(int const &xid, int const &yid, int const &zid, int const &nx,
+                                              int const &ny)
+{
+  return xid + yid * nx + zid * nx * ny;
+}
 
-    inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const &nx, int const &ny, int const &nz, int &is, int &ie, int &js, int &je, int &ks, int &ke) {
-        is = n_ghost;
-        ie = nx - n_ghost;
-        if (ny == 1) {
-            js = 0;
-            je = 1;
-        } else {
-            js = n_ghost;
-            je = ny - n_ghost;
-        }
-        if (nz == 1) {
-            ks = 0;
-            ke = 1;
-        } else {
-            ks = n_ghost;
-            ke = nz - n_ghost;
-        }
-    }
+inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const &nx, int const &ny, int const &nz,
+                                                 int &is, int &ie, int &js, int &je, int &ks, int &ke)
+{
+  is = n_ghost;
+  ie = nx - n_ghost;
+  if (ny == 1) {
+    js = 0;
+    je = 1;
+  } else {
+    js = n_ghost;
+    je = ny - n_ghost;
+  }
+  if (nz == 1) {
+    ks = 0;
+    ke = 1;
+  } else {
+    ks = n_ghost;
+    ke = nz - n_ghost;
+  }
+}
 
-    // =========================================================================
-    /*!
-    * \brief Set the value that `pointer` points at in GPU memory to `value`.
-    * This only sets the first value in memory so if `pointer` points to an
-    * array then only `pointer[0]` will be set; i.e. this effectively does
-    * `pointer = &value`
-    *
-    * \tparam T Any scalar type
-    * \param[in] pointer The location in GPU memory
-    * \param[in] value The value to set `*pointer` to
-    */
-    template <typename T>
-    void setScalarDeviceMemory(T *pointer, T const value)
-    {
-        CudaSafeCall(
-            cudaMemcpy(pointer,  // destination
-                       &value,   // source
-                       sizeof(T),
-                       cudaMemcpyHostToDevice));
+/*!
+ * \brief Initialize GPU memory
+ *
+ * \param[in] ptr The pointer to GPU memory
+ * \param[in] N The size of the array in bytes
+ */
+inline void initGpuMemory(Real *ptr, size_t N) { GPU_Error_Check(cudaMemset(ptr, 0, N)); }
+
+// =====================================================================
+/*!
+ * \brief Struct to determine the optimal number of blocks and threads
+ * per block to use when launching a kernel. The member
+ * variables are `threadsPerBlock` and `numBlocks` which are chosen with
+ * the occupancy API.
+ *
+ */
+template <typename T>
+struct AutomaticLaunchParams {
+ public:
+  /*!
+   * \brief Construct a new AutomaticLaunchParams object. By default it
+   * generates values of numBlocks and threadsPerBlock suitable for a
+   * kernel with a grid-stride loop. For a kernel with one thread per
+   * element set the optional `numElements` argument to the number of
+   * elements
+   *
+   * \param[in] kernel The kernel to determine the launch parameters for
+   * \param[in] numElements The number of elements in the array that
+   the kernel operates on
+   */
+  AutomaticLaunchParams(T &kernel, size_t numElements = 0)
+  {
+    cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, 0);
+
+    if (numElements > 0) {
+      // This line is needed to check that threadsPerBlock isn't zero. Somewhere inside
+      // cudaOccupancyMaxPotentialBlockSize threadsPerBlock can be zero according to clang-tidy so this line sets it to
+      // a more reasonable value
+      threadsPerBlock = (threadsPerBlock == 0) ? TPB : threadsPerBlock;
+
+      // Compute the number of blocks
+      numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock;
     }
-    // =========================================================================
-}
\ No newline at end of file
+  }
+
+  /// Defaulted Destructor
+  ~AutomaticLaunchParams() = default;
+
+  /// The maximum number of threads per block that the device supports
+  int threadsPerBlock;
+  /// The maximum number of scheduleable blocks on the device
+  int numBlocks;
+};
+// =====================================================================
+
+// =====================================================================
+/*!
+ * \brief Print the current GPU memory usage to standard out
+ *
+ * \param additional_text Any additional text to be appended to the end of the message
+ */
+void Print_GPU_Memory_Usage(std::string const &additional_text = "");
+// =====================================================================
+}  // end namespace cuda_utilities
\ No newline at end of file
diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp
index ddefebfd7..ab35d28d2 100644
--- a/src/utils/cuda_utilities_tests.cpp
+++ b/src/utils/cuda_utilities_tests.cpp
@@ -1,23 +1,24 @@
 
 /*!
  * \file cuda_utilities_tests.cpp
- * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu)
- * \brief Tests for the contents of cuda_utilities.h and cuda_utilities.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie
+ * (helenarichie@pitt.edu) \brief Tests for the contents of cuda_utilities.h and
+ * cuda_utilities.cpp
  *
  */
 
 // STL Includes
-#include <vector>
-#include <string>
 #include <iostream>
+#include <string>
+#include <vector>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local Includes
-#include "../utils/testing_utilities.h"
-#include "../utils/cuda_utilities.h"
 #include "../global/global.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/testing_utilities.h"
 
 /*
  PCM : n_ghost = 2
@@ -31,112 +32,88 @@
 // Local helper functions
 namespace
 {
-    struct TestParams
-    {
-        std::vector<int> n_ghost {2, 2, 3, 4};
-        std::vector<int> nx {100, 2048, 2048, 2048};
-        std::vector<int> ny {1, 2048, 2048, 2048};
-        std::vector<int> nz {1, 4096, 4096, 4096};
-        std::vector<std::string> names {"Single-cell 3D PCM/PLMP case", "Large 3D PCM/PLMP case", "Large PLMC case", "Large PPMP/PPMC case"};
-
-    };
-}
-
-TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) {
-    TestParams parameters;
-    std::vector<std::vector<int>> fiducial_indices {{2, 98, 0, 1, 0, 1},
-                                               {2, 2046, 2, 2046, 2, 4094},
-                                               {3, 2045, 3, 2045, 3, 4093},
-                                               {4, 2044, 4, 2044, 4, 4092}};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        int is;
-        int ie;
-        int js;
-        int je;
-        int ks;
-        int ke;
-        cuda_utilities::Get_Real_Indices(parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), parameters.nz.at(i), is, ie, js, je, ks, ke);
-
-        std::vector<std::string> index_names {"is", "ie", "js", "je", "ks", "ke"};
-        std::vector<int> test_indices {is, ie, js, je, ks, ke};
-
-        for (size_t j = 0; j < test_indices.size(); j++)
-        {
-            testingUtilities::checkResults(fiducial_indices[i][j], test_indices[j], index_names[j] + " " + parameters.names[i]);
-        }
+struct TestParams {
+  std::vector<int> n_ghost{2, 2, 3, 4};
+  std::vector<int> nx{100, 2048, 2048, 2048};
+  std::vector<int> ny{1, 2048, 2048, 2048};
+  std::vector<int> nz{1, 4096, 4096, 4096};
+  std::vector<std::string> names{"Single-cell 3D PCM/PLMP case", "Large 3D PCM/PLMP case", "Large PLMC case",
+                                 "Large PPMP/PPMC case"};
+};
+}  // namespace
+
+TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<std::vector<int>> fiducial_indices{
+      {2, 98, 0, 1, 0, 1}, {2, 2046, 2, 2046, 2, 4094}, {3, 2045, 3, 2045, 3, 4093}, {4, 2044, 4, 2044, 4, 4092}};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    int is;
+    int ie;
+    int js;
+    int je;
+    int ks;
+    int ke;
+    cuda_utilities::Get_Real_Indices(parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i),
+                                     parameters.nz.at(i), is, ie, js, je, ks, ke);
+
+    std::vector<std::string> index_names{"is", "ie", "js", "je", "ks", "ke"};
+    std::vector<int> test_indices{is, ie, js, je, ks, ke};
+
+    for (size_t j = 0; j < test_indices.size(); j++) {
+      testing_utilities::Check_Results(fiducial_indices[i][j], test_indices[j],
+                                       index_names[j] + " " + parameters.names[i]);
     }
+  }
 }
 
 // =============================================================================
-TEST(tALLCompute3DIndices,
-     CorrectInputExpectCorrectOutput)
+TEST(tALLCompute3DIndices, CorrectInputExpectCorrectOutput)
 {
-    // Parameters
-    int const id = 723;
-    int const nx = 34;
-    int const ny = 14;
-
-    // Fiducial Data
-    int const fiducialXid = 9;
-    int const fiducialYid = 7;
-    int const fiducialZid = 1;
-
-    // Test Variables
-    int testXid;
-    int testYid;
-    int testZid;
-
-    // Get test data
-    cuda_utilities::compute3DIndices(id, nx, ny, testXid, testYid, testZid);
-
-    EXPECT_EQ(fiducialXid, testXid);
-    EXPECT_EQ(fiducialYid, testYid);
-    EXPECT_EQ(fiducialZid, testZid);
+  // Parameters
+  int const id = 723;
+  int const nx = 34;
+  int const ny = 14;
+
+  // Fiducial Data
+  int const fiducialXid = 9;
+  int const fiducialYid = 7;
+  int const fiducialZid = 1;
+
+  // Test Variables
+  int testXid;
+  int testYid;
+  int testZid;
+
+  // Get test data
+  cuda_utilities::compute3DIndices(id, nx, ny, testXid, testYid, testZid);
+
+  EXPECT_EQ(fiducialXid, testXid);
+  EXPECT_EQ(fiducialYid, testYid);
+  EXPECT_EQ(fiducialZid, testZid);
 }
 // =============================================================================
 
 // =============================================================================
-TEST(tALLCompute1DIndex,
-     CorrectInputExpectCorrectOutput)
+TEST(tALLCompute1DIndex, CorrectInputExpectCorrectOutput)
 {
-    // Parameters
-    int const xid = 72;
-    int const yid = 53;
-    int const zid = 14;
-    int const nx  = 128;
-    int const ny  = 64;
-
-    // Fiducial Data
-    int const fiducialId = 121544;
-
-    // Test Variable
-    int testId;
+  // Parameters
+  int const xid = 72;
+  int const yid = 53;
+  int const zid = 14;
+  int const nx  = 128;
+  int const ny  = 64;
 
-    // Get test data
-    testId = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
-
-    EXPECT_EQ(fiducialId, testId);
-}
-// =============================================================================
-
-// =============================================================================
-TEST(tALLSetScalarDeviceMemory,
-     TypeDoubleInputExpectCorrectValueSet)
-{
-    double value = 173.246;
-    double *dev_ptr, host_val;
-    CudaSafeCall(cudaMalloc(&dev_ptr, sizeof(double)));
+  // Fiducial Data
+  int const fiducialId = 121544;
 
-    cuda_utilities::setScalarDeviceMemory<double>(dev_ptr, value);
+  // Test Variable
+  int testId;
 
-    CudaSafeCall(
-        cudaMemcpy(&host_val,  // destination
-                    dev_ptr,    // source
-                    sizeof(double),
-                    cudaMemcpyDeviceToHost));
+  // Get test data
+  testId = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny);
 
-    EXPECT_EQ(value, host_val);
+  EXPECT_EQ(fiducialId, testId);
 }
 // =============================================================================
diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu
new file mode 100644
index 000000000..20720583f
--- /dev/null
+++ b/src/utils/debug_utilities.cu
@@ -0,0 +1,60 @@
+#include <math.h>
+
+#include "../global/global.h"
+#include "../global/global_cuda.h"
+#include "../io/io.h"                 // provides chprintf
+#include "../utils/error_handling.h"  // provides chexit
+
+__global__ void Dump_Values_Kernel(Real* device_array, int array_size, int marker)
+{
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (tid >= array_size) {
+    return;
+  }
+  kernel_printf("Dump Values: marker %d tid %d value %g \n", marker, tid, device_array[tid]);
+}
+
+/*
+  Prints out all values of a device_array
+ */
+void Dump_Values(Real* device_array, int array_size, int marker)
+{
+  int ngrid = (array_size + TPB - 1) / TPB;
+  dim3 dim1dGrid(ngrid, 1, 1);
+  dim3 dim1dBlock(TPB, 1, 1);
+  hipLaunchKernelGGL(Dump_Values_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, marker);
+}
+
+__global__ void Check_For_Nan_Kernel(Real* device_array, int array_size, int check_num, bool* out_bool)
+{
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if (tid >= array_size) {
+    return;
+  }
+  if (device_array[tid] == device_array[tid]) {
+    return;
+  }
+  out_bool[0] = true;
+  kernel_printf("Check_For_Nan_Kernel found Nan Checknum: %d Thread: %d\n", check_num, tid);
+}
+
+/*
+  Checks a device_array for NaN and prints/exits if found
+ */
+void Check_For_Nan(Real* device_array, int array_size, int check_num)
+{
+  bool host_out_bool[1] = {false};
+  bool* out_bool;
+  GPU_Error_Check(cudaMalloc((void**)&out_bool, sizeof(bool)));
+  cudaMemcpy(out_bool, host_out_bool, sizeof(bool), cudaMemcpyHostToDevice);
+  int ngrid = (array_size + TPB - 1) / TPB;
+  dim3 dim1dGrid(ngrid, 1, 1);
+  dim3 dim1dBlock(TPB, 1, 1);
+  hipLaunchKernelGGL(Check_For_Nan_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, check_num, out_bool);
+  cudaMemcpy(host_out_bool, out_bool, sizeof(bool), cudaMemcpyDeviceToHost);
+  cudaFree(out_bool);
+
+  if (host_out_bool[0]) {
+    chexit(-1);
+  }
+}
diff --git a/src/utils/error_check_cuda.cu b/src/utils/error_check_cuda.cu
index 32aa2274e..153106b10 100644
--- a/src/utils/error_check_cuda.cu
+++ b/src/utils/error_check_cuda.cu
@@ -1,30 +1,30 @@
 /*! \file error_check_cuda.cu
  *  \brief Error Check Cuda */
 
-#ifdef CUDA
-
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <math.h>
-#include "../utils/gpu.hpp"
+
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../io/io.h"
 #include "../utils/error_check_cuda.h"
+#include "../utils/gpu.hpp"
 
-
-__global__ void Check_Value_Along_Axis( Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value){
-
+__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost,
+                                       int *return_value)
+{
   int tid_j = blockIdx.x * blockDim.x + threadIdx.x;
   int tid_k = blockIdx.y * blockDim.y + threadIdx.y;
 
-
-  if ( blockDim.x != N_Y || blockDim.y != N_Z ){
-    if ( tid_j == 0 && tid_k == 0 ) printf("ERROR CHECK: Block Dimension Error \n" );
+  if (blockDim.x != N_Y || blockDim.y != N_Z) {
+    if (tid_j == 0 && tid_k == 0) {
+      printf("ERROR CHECK: Block Dimension Error \n");
+    }
     return;
   }
 
-  __shared__ Real sh_data[N_Z*N_Y];
+  __shared__ Real sh_data[N_Z * N_Y];
   //
   int n_cells, indx_x, indx_3d, indx_2d;
   Real field_value;
@@ -35,45 +35,38 @@ __global__ void Check_Value_Along_Axis( Real *dev_array, int n_field, int nx, in
   int error = 0;
 
   indx_x = 0;
-  for ( indx_x=0; indx_x<nx; indx_x++){
-
-    indx_3d = indx_x + tid_j * nx + tid_k * nx * ny;
-    indx_2d = tid_j + tid_k * ny;
-    field_value = dev_array[ n_field * n_cells + indx_3d ];
+  for (indx_x = 0; indx_x < nx; indx_x++) {
+    indx_3d          = indx_x + tid_j * nx + tid_k * nx * ny;
+    indx_2d          = tid_j + tid_k * ny;
+    field_value      = dev_array[n_field * n_cells + indx_3d];
     sh_data[indx_2d] = field_value;
 
     __syncthreads();
 
-    if ( tid_j == 0 && tid_k == 0 ){
-      for( i=0; i<N_Y*N_Z-1; i++ ){
-        if ( sh_data[i] == sh_data[i+1] ) error += 1;
+    if (tid_j == 0 && tid_k == 0) {
+      for (i = 0; i < N_Y * N_Z - 1; i++) {
+        if (sh_data[i] == sh_data[i + 1]) {
+          error += 1;
+        }
       }
     }
-
   }
 
-  if ( tid_j == 0 && tid_k == 0 ) *return_value = error;
-
-
+  if (tid_j == 0 && tid_k == 0) {
+    *return_value = error;
+  }
 }
 
-int Check_Field_Along_Axis( Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error, dim3 Block_Error ){
-
+int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error,
+                           dim3 Block_Error)
+{
   int *error_value_dev;
-  CudaSafeCall( cudaMalloc((void**)&error_value_dev,   sizeof(int)) );
-  hipLaunchKernelGGL(Check_Value_Along_Axis, Grid_Error, Block_Error, 0, 0,  dev_conserved, 0, nx, ny, nz, n_ghost, error_value_dev );
+  GPU_Error_Check(cudaMalloc((void **)&error_value_dev, sizeof(int)));
+  hipLaunchKernelGGL(Check_Value_Along_Axis, Grid_Error, Block_Error, 0, 0, dev_conserved, 0, nx, ny, nz, n_ghost,
+                     error_value_dev);
 
   int error_value_host;
-  CudaSafeCall( cudaMemcpy( &error_value_host, error_value_dev, sizeof(int), cudaMemcpyDeviceToHost) );
+  GPU_Error_Check(cudaMemcpy(&error_value_host, error_value_dev, sizeof(int), cudaMemcpyDeviceToHost));
 
   return error_value_host;
-
-
 }
-
-
-
-
-
-
-#endif
diff --git a/src/utils/error_check_cuda.h b/src/utils/error_check_cuda.h
index 3ff7cd58c..98bf9b391 100644
--- a/src/utils/error_check_cuda.h
+++ b/src/utils/error_check_cuda.h
@@ -1,23 +1,18 @@
 /*! \file error_check_cuda.h
  *  \brief error_check_cuda.h */
 
-#ifdef CUDA
-
 #ifndef ERROR_CHECK_CUDA_H
 #define ERROR_CHECK_CUDA_H
 
 #include "../global/global.h"
 
-
 #define N_Z 24
 #define N_Y 24
 
+int Check_Field_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error,
+                           dim3 Block_Error);
 
-int Check_Field_Along_Axis( Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, dim3 Grid_Error, dim3 Block_Error );
-
-__global__ void Check_Value_Along_Axis( Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value);
-
-
+__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost,
+                                       int *return_value);
 
-#endif //ERROR_CHECK_CUDA_H
-#endif //CUDA
+#endif  // ERROR_CHECK_CUDA_H
diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp
index 6dba70000..60246cfbe 100644
--- a/src/utils/error_handling.cpp
+++ b/src/utils/error_handling.cpp
@@ -1,27 +1,184 @@
 #include "../utils/error_handling.h"
-#ifdef   MPI_CHOLLA
-#include <mpi.h>
-void chexit(int code)
-{
 
-  if(code==0)
-  {
+#include <cassert>
+#include <cstdarg>
+#include <cstdio>
+#include <iostream>
+#include <string>
+
+#ifdef MPI_CHOLLA
+  #include "../mpi/mpi_routines.h"
+[[noreturn]] void chexit(int code)
+{
+  if (code == 0) {
     /*exit normally*/
     MPI_Finalize();
     exit(code);
 
-  }else{
-
+  } else {
     /*exit with non-zero error code*/
-    MPI_Abort(MPI_COMM_WORLD,code);
+    MPI_Abort(MPI_COMM_WORLD, code);
     exit(code);
-
   }
 }
 #else  /*MPI_CHOLLA*/
-void chexit(int code)
+[[noreturn]] void chexit(int code)
 {
   /*exit using code*/
   exit(code);
 }
 #endif /*MPI_CHOLLA*/
+
+void Check_Configuration(Parameters const& P)
+{
+// General Checks
+// ==============
+#ifndef GIT_HASH
+  #error "GIT_HASH is not defined"
+#endif  //! GIT_HASH
+
+  // Check that GIT_HASH is the correct length. It needs to be 41 and not 40 since strings are null terminated
+  static_assert(sizeof(GIT_HASH) == 41);
+
+#ifndef MACRO_FLAGS
+  #error "MACRO_FLAGS is not defined"
+#endif  //! MACRO_FLAGS
+
+  // Check that MACRO_FLAGS has contents
+  static_assert(sizeof(MACRO_FLAGS) > 1);
+
+// Can only have one integrator enabled
+#if ((defined(VL) + defined(CTU) + defined(SIMPLE)) != 1)
+  #error "Only one integrator can be enabled at a time."
+#endif  // Only one integrator check
+
+  // Check the boundary conditions
+  auto Check_Boundary = [](int const& boundary, std::string const& direction) {
+    bool is_allowed_bc = boundary >= 0 and boundary <= 4;
+    CHOLLA_ASSERT(is_allowed_bc,
+                  "WARNING: Possibly invalid boundary conditions for direction: %s flag: %d. Must "
+                  "select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), "
+                  "4 (custom), 5 (mpi).",
+                  direction.c_str(), boundary);
+  };
+  Check_Boundary(P.xl_bcnd, "xl_bcnd");
+  Check_Boundary(P.xu_bcnd, "xu_bcnd");
+  Check_Boundary(P.yl_bcnd, "yl_bcnd");
+  Check_Boundary(P.yu_bcnd, "yu_bcnd");
+  Check_Boundary(P.zl_bcnd, "zl_bcnd");
+  Check_Boundary(P.zu_bcnd, "zu_bcnd");
+
+  // warn if error checking is disabled
+#ifndef DISABLE_GPU_ERROR_CHECKING
+  // NOLINTNEXTLINE(clang-diagnostic-#warnings)
+  #warning "CUDA error checking is disabled. Enable it by compiling without the DISABLE_GPU_ERROR_CHECKING macro."
+#endif  //! DISABLE_GPU_ERROR_CHECKING
+
+  // Check that PRECISION is 2
+#ifndef PRECISION
+  #error "The PRECISION macro is required"
+#endif  //! PRECISION
+  static_assert(PRECISION == 2, "PRECISION must be 2. Single precision is not currently supported");
+
+// MHD Checks
+// ==========
+#ifdef MHD
+  assert(P.nx > 1 and P.ny > 1 and P.nz > 1 and "MHD runs must be 3D");
+
+  // Must use the correct integrator
+  #if !defined(VL) || defined(SIMPLE) || defined(CTU)
+    #error "MHD only supports the Van Leer integrator"
+  #endif  //! VL or SIMPLE
+
+  // must only use HLLD
+  #if !defined(HLLD) || defined(EXACT) || defined(ROE) || defined(HLL) || defined(HLLC)
+    #error "MHD only supports the HLLD Riemann Solver"
+  #endif  //! HLLD or EXACT or ROE or HLL or HLLC
+
+  // May only use certain reconstructions
+  #if ((defined(PCM) + defined(PLMC) + defined(PPMC)) != 1) || defined(PLMP) || defined(PPMP)
+    #error "MHD only supports PCM, PLMC, and PPMC reconstruction"
+  #endif  // Reconstruction check
+
+  // must have HDF5
+  #if defined(OUTPUT) and (not defined(HDF5))
+    #error "MHD only supports HDF5 output"
+  #endif  //! HDF5
+
+  // Warn that diode boundaries are disabled
+  if (P.xl_bcnd == 3 or P.xu_bcnd == 3 or P.yl_bcnd == 3 or P.yu_bcnd == 3 or P.zl_bcnd == 3 or P.zu_bcnd == 3) {
+    std::cerr << "Warning: The diode on the outflow boundaries is disabled for MHD" << std::endl;
+  }
+
+  // Error if unsupported boundary condition is used
+  assert(P.xl_bcnd != 2 or P.xu_bcnd != 2 or P.yl_bcnd != 2 or P.yu_bcnd != 2 or P.zl_bcnd != 2 or
+         P.zu_bcnd != 2 && "MHD does not support reflective boundary conditions");
+
+  // AVERAGE_SLOW_CELLS not supported on MHD
+  #ifdef AVERAGE_SLOW_CELLS
+    #error "MHD does not support AVERAGE_SLOW_CELLS"
+  #endif  // AVERAGE_SLOW_CELLS
+
+#endif  // MHD
+}
+
+// NOLINTNEXTLINE(cert-dcl50-cpp)
+[[noreturn]] void Abort_With_Err_(const char* func_name, const char* file_name, int line_num, const char* msg, ...)
+{
+  // considerations when using MPI:
+  //  - all processes must execute this function to catch errors that happen on
+  //    just one process
+  //  - to handle cases where all processes encounter the same error, we
+  //    pre-buffer the error message (so that the output remains legible)
+
+  // since we are aborting, it's OK that this isn't the most optimized
+
+  // prepare some info for the error message header
+  const char* sanitized_func_name = (func_name == nullptr) ? "{unspecified}" : func_name;
+
+#ifdef MPI_CHOLLA
+  std::string proc_info = std::to_string(procID) + " / " + std::to_string(nproc) + " (using MPI)";
+#else
+  std::string proc_info = "0 / 1 (NOT using MPI)";
+#endif
+
+  // prepare the formatted message
+  std::string msg_buf;
+  if (msg == nullptr) {
+    msg_buf = "{nullptr encountered instead of error message}";
+  } else {
+    std::va_list args, args_copy;
+    va_start(args, msg);
+    va_copy(args_copy, args);
+
+    // The clang-analyzer-valist.Uninitialized is bugged and triggers improperly on this line
+    // NOLINTNEXTLINE(clang-analyzer-valist.Uninitialized)
+    std::size_t bufsize_without_terminator = std::vsnprintf(nullptr, 0, msg, args);
+    va_end(args);
+
+    // NOTE: starting in C++17 it's possible to mutate msg_buf by mutating msg_buf.data()
+
+    // we initialize a msg_buf with size == bufsize_without_terminator (filled with ' ' chars)
+    // - msg_buf.data() returns a ptr with msg_buf.size() + 1 characters. We are allowed to
+    //   mutate any of the first msg_buf.size() characters. The entry at
+    //   msg_buf.data()[msg_buf.size()] is initially  '\0' (& it MUST remain equal to '\0')
+    // - the 2nd argument of std::vsnprintf is the size of the output buffer. We NEED to
+    //   include the terminator character in this argument, otherwise the formatted message
+    //   will be truncated
+    msg_buf = std::string(bufsize_without_terminator, ' ');
+    std::vsnprintf(msg_buf.data(), bufsize_without_terminator + 1, msg, args_copy);
+    va_end(args_copy);
+  }
+
+  // now write the error and exit
+  std::fprintf(stderr,
+               "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
+               "Error occurred in %s on line %d\n"
+               "Function: %s\n"
+               "Rank: %s\n"
+               "Message: %s\n"
+               "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n",
+               file_name, line_num, sanitized_func_name, proc_info.data(), msg_buf.data());
+  std::fflush(stderr);  // may be unnecessary for stderr
+  chexit(1);
+}
\ No newline at end of file
diff --git a/src/utils/error_handling.h b/src/utils/error_handling.h
index 174c083b6..8ce08e2ca 100644
--- a/src/utils/error_handling.h
+++ b/src/utils/error_handling.h
@@ -1,5 +1,71 @@
 #ifndef ERROR_HANDLING_CHOLLA_H
 #define ERROR_HANDLING_CHOLLA_H
 #include <stdlib.h>
-void chexit(int code);
+
+#include "../global/global.h"
+[[noreturn]] void chexit(int code);
+
+/*!
+ * \brief Check that the Cholla configuration and parameters don't have any significant errors. Mostly compile time
+ * checks.
+ *
+ */
+void Check_Configuration(Parameters const& P);
+
+/*!
+ * \brief helper function that prints an error message & aborts the program (in
+ * an MPI-safe way). Commonly invoked through a macro.
+ *
+ */
+[[noreturn]] void Abort_With_Err_(const char* func_name, const char* file_name, int line_num, const char* msg, ...);
+
+/* __CHOLLA_PRETTY_FUNC__ is a magic constant like __LINE__ or __FILE__ that
+ * provides the name of the current function.
+ * - The C++11 standard requires that __func__ is provided on all platforms, but
+ *   that only provides limited information (just the name of the function).
+ * - Where available, we prefer to use compiler-specific features that provide
+ *   more information about the function (like the scope of the function & the
+ *   the function signature).
+ */
+#ifdef __GNUG__
+  #define __CHOLLA_PRETTY_FUNC__ __PRETTY_FUNCTION__
+#else
+  #define __CHOLLA_PRETTY_FUNC__ __func__
+#endif
+
+/*!
+ * \brief print an error-message (with printf formatting) & abort the program.
+ *
+ * This macro should be treated as a function with the signature:
+ *   [[noreturn]] void CHOLLA_ERROR(const char* msg, ...);
+ *
+ * - The 1st arg is printf-style format argument specifying the error message
+ * - The remaining args arguments are used to format error message
+ *
+ * \note
+ * the ``msg`` string is part of the variadic args so that there is always
+ * at least 1 variadic argument (even in cases when ``msg`` doesn't format
+ * any arguments). There is no way around this until C++ 20.
+ */
+#define CHOLLA_ERROR(...) Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__)
+
+/*!
+ * \brief if the condition is false, print an error-message (with printf
+ * formatting) & abort the program.
+ *
+ * This macro should be treated as a function with the signature:
+ *   [[noreturn]] void CHOLLA_ASSERT(bool cond, const char* msg, ...);
+ *
+ * - The 1st arg is a boolean condition. When true, this does noth
+ * - The 2nd arg is printf-style format argument specifying the error message
+ * - The remaining args arguments are used to format error message
+ *
+ * \note
+ * the behavior is independent of the ``NDEBUG`` macro
+ */
+#define CHOLLA_ASSERT(cond, ...)                                              \
+  if (not(cond)) {                                                            \
+    Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__); \
+  }
+
 #endif /*ERROR_HANDLING_CHOLLA_H*/
diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp
index f68172768..0817940cc 100644
--- a/src/utils/gpu.hpp
+++ b/src/utils/gpu.hpp
@@ -3,437 +3,470 @@
 #include <cassert>
 #include <cstdio>
 #include <cstdlib>
+#include <experimental/source_location>
+#include <iostream>
+
+#include "../utils/error_handling.h"
 
 #ifdef O_HIP
 
-#include <hip/hip_runtime.h>
+  #include <hip/hip_runtime.h>
+
+  #if defined(PARIS) || defined(PARIS_GALACTIC)
+
+    #include <hipfft.h>
+
+  #endif  // CUFFT PARIS PARIS_GALACTIC
+
+  #define WARPSIZE 64
+static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE;
+
+  #define CUFFT_D2Z     HIPFFT_D2Z
+  #define CUFFT_FORWARD HIPFFT_FORWARD
+  #define CUFFT_INVERSE HIPFFT_BACKWARD
+  #define CUFFT_Z2D     HIPFFT_Z2D
+  #define CUFFT_Z2Z     HIPFFT_Z2Z
+  #define CUFFT_SUCCESS HIPFFT_SUCCESS
+  #define cufftResult_t hipfftResult_t
+
+  #define cudaDeviceSynchronize              hipDeviceSynchronize
+  #define cudaError                          hipError_t
+  #define cudaError_t                        hipError_t
+  #define cudaErrorInsufficientDriver        hipErrorInsufficientDriver
+  #define cudaErrorNoDevice                  hipErrorNoDevice
+  #define cudaEvent_t                        hipEvent_t
+  #define cudaEventCreate                    hipEventCreate
+  #define cudaEventElapsedTime               hipEventElapsedTime
+  #define cudaEventRecord                    hipEventRecord
+  #define cudaEventSynchronize               hipEventSynchronize
+  #define cudaFree                           hipFree
+  #define cudaFreeHost                       hipHostFree
+  #define cudaGetDevice                      hipGetDevice
+  #define cudaGetDeviceCount                 hipGetDeviceCount
+  #define cudaGetErrorString                 hipGetErrorString
+  #define cudaGetLastError                   hipGetLastError
+  #define cudaHostAlloc                      hipHostMalloc
+  #define cudaHostAllocDefault               hipHostMallocDefault
+  #define cudaMalloc                         hipMalloc
+  #define cudaMemcpy                         hipMemcpy
+  #define cudaMemcpyAsync                    hipMemcpyAsync
+  #define cudaMemcpyPeer                     hipMemcpyPeer
+  #define cudaMemcpyDeviceToHost             hipMemcpyDeviceToHost
+  #define cudaMemcpyDeviceToDevice           hipMemcpyDeviceToDevice
+  #define cudaMemcpyHostToDevice             hipMemcpyHostToDevice
+  #define cudaMemGetInfo                     hipMemGetInfo
+  #define cudaMemset                         hipMemset
+  #define cudaReadModeElementType            hipReadModeElementType
+  #define cudaSetDevice                      hipSetDevice
+  #define cudaSuccess                        hipSuccess
+  #define cudaDeviceProp                     hipDeviceProp_t
+  #define cudaGetDeviceProperties            hipGetDeviceProperties
+  #define cudaPointerAttributes              hipPointerAttribute_t
+  #define cudaPointerGetAttributes           hipPointerGetAttributes
+  #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize
+  #define cudaMemGetInfo                     hipMemGetInfo
+  #define cudaDeviceGetPCIBusId              hipDeviceGetPCIBusId
+  #define cudaPeekAtLastError                hipPeekAtLastError
+
+  // Texture definitions
+  #define cudaArray           hipArray
+  #define cudaMallocArray     hipMallocArray
+  #define cudaFreeArray       hipFreeArray
+  #define cudaMemcpyToArray   hipMemcpyToArray
+  #define cudaMemcpy2DToArray hipMemcpy2DToArray
+
+  #define cudaTextureObject_t      hipTextureObject_t
+  #define cudaCreateTextureObject  hipCreateTextureObject
+  #define cudaDestroyTextureObject hipDestroyTextureObject
+
+  #define cudaChannelFormatDesc      hipChannelFormatDesc
+  #define cudaCreateChannelDesc      hipCreateChannelDesc
+  #define cudaChannelFormatKindFloat hipChannelFormatKindFloat
+
+  #define cudaResourceDesc      hipResourceDesc
+  #define cudaResourceTypeArray hipResourceTypeArray
+  #define cudaTextureDesc       hipTextureDesc
+  #define cudaAddressModeClamp  hipAddressModeClamp
+  #define cudaFilterModeLinear  hipFilterModeLinear
+  #define cudaFilterModePoint   hipFilterModePoint
+  // Texture Definitions
+  #define cudaPointerAttributes    hipPointerAttribute_t
+  #define cudaPointerGetAttributes hipPointerGetAttributes
+
+  // FFT definitions
+  #define cufftDestroy       hipfftDestroy
+  #define cufftDoubleComplex hipfftDoubleComplex
+  #define cufftDoubleReal    hipfftDoubleReal
+  #define cufftExecD2Z       hipfftExecD2Z
+  #define cufftExecZ2D       hipfftExecZ2D
+  #define cufftExecZ2Z       hipfftExecZ2Z
+  #define cufftHandle        hipfftHandle
+  #define cufftPlan3d        hipfftPlan3d
+  #define cufftPlanMany      hipfftPlanMany
+
+  #define curandStateMRG32k3a_t hiprandStateMRG32k3a_t
+  #define curand_init           hiprand_init
+  #define curand                hiprand
+  #define curand_poisson        hiprand_poisson
 
-#if defined(PARIS) || defined(PARIS_GALACTIC)
+#else  // not O_HIP
 
-#include <hipfft.h>
+  #include <cuda_runtime.h>
 
-static void __attribute__((unused)) check(const hipfftResult err, const char *const file, const int line)
-{
-  if (err == HIPFFT_SUCCESS) return;
-  fprintf(stderr,"HIPFFT ERROR AT LINE %d OF FILE '%s': %d\n",line,file,err);
-  fflush(stderr);
-  exit(err);
-}
+  #if defined(PARIS) || defined(PARIS_GALACTIC)
 
-#endif  // PARIS PARIC_GALACTIC
-
-#define WARPSIZE 64
-static constexpr int maxWarpsPerBlock = 1024/WARPSIZE;
-
-#define CUFFT_D2Z HIPFFT_D2Z
-#define CUFFT_FORWARD HIPFFT_FORWARD
-#define CUFFT_INVERSE HIPFFT_BACKWARD
-#define CUFFT_Z2D HIPFFT_Z2D
-#define CUFFT_Z2Z HIPFFT_Z2Z
-
-#define cudaDeviceSynchronize hipDeviceSynchronize
-#define cudaError hipError_t
-#define cudaError_t hipError_t
-#define cudaErrorInsufficientDriver hipErrorInsufficientDriver
-#define cudaErrorNoDevice hipErrorNoDevice
-#define cudaEvent_t hipEvent_t
-#define cudaEventCreate hipEventCreate
-#define cudaEventElapsedTime hipEventElapsedTime
-#define cudaEventRecord hipEventRecord
-#define cudaEventSynchronize hipEventSynchronize
-#define cudaFree hipFree
-#define cudaFreeHost hipHostFree
-#define cudaGetDevice hipGetDevice
-#define cudaGetDeviceCount hipGetDeviceCount
-#define cudaGetErrorString hipGetErrorString
-#define cudaGetLastError hipGetLastError
-#define cudaHostAlloc hipHostMalloc
-#define cudaHostAllocDefault hipHostMallocDefault
-#define cudaMalloc hipMalloc
-#define cudaMemcpy hipMemcpy
-#define cudaMemcpyAsync hipMemcpyAsync
-#define cudaMemcpyPeer hipMemcpyPeer
-#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
-#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
-#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
-#define cudaMemGetInfo hipMemGetInfo
-#define cudaMemset hipMemset
-#define cudaReadModeElementType hipReadModeElementType
-#define cudaSetDevice hipSetDevice
-#define cudaSuccess hipSuccess
-#define cudaDeviceProp hipDeviceProp_t
-#define cudaGetDeviceProperties hipGetDeviceProperties
-#define cudaPointerAttributes hipPointerAttribute_t
-#define cudaPointerGetAttributes hipPointerGetAttributes
-
-// Texture definitions
-#define cudaArray hipArray
-#define cudaMallocArray hipMallocArray 
-#define cudaFreeArray hipFreeArray
-#define cudaMemcpyToArray hipMemcpyToArray
-#define cudaMemcpy2DToArray hipMemcpy2DToArray
-
-
-#define cudaTextureObject_t hipTextureObject_t
-#define cudaCreateTextureObject hipCreateTextureObject
-#define cudaDestroyTextureObject hipDestroyTextureObject
-
-#define cudaChannelFormatDesc hipChannelFormatDesc
-#define cudaCreateChannelDesc hipCreateChannelDesc
-#define cudaChannelFormatKindFloat hipChannelFormatKindFloat
-
-#define cudaResourceDesc hipResourceDesc
-#define cudaResourceTypeArray hipResourceTypeArray
-#define cudaTextureDesc hipTextureDesc
-#define cudaAddressModeClamp hipAddressModeClamp
-#define cudaFilterModeLinear hipFilterModeLinear
-#define cudaFilterModePoint hipFilterModePoint 
-// Texture Definitions
-
-// FFT definitions
-#define cufftDestroy hipfftDestroy
-#define cufftDoubleComplex hipfftDoubleComplex
-#define cufftDoubleReal hipfftDoubleReal
-#define cufftExecD2Z hipfftExecD2Z
-#define cufftExecZ2D hipfftExecZ2D
-#define cufftExecZ2Z hipfftExecZ2Z
-#define cufftHandle hipfftHandle
-#define cufftPlan3d hipfftPlan3d
-#define cufftPlanMany hipfftPlanMany
-
-static void __attribute__((unused)) check(const hipError_t err, const char *const file, const int line)
-{
-  if (err == hipSuccess) return;
-  fprintf(stderr,"HIP ERROR AT LINE %d OF FILE '%s': %s %s\n",line,file,hipGetErrorName(err),hipGetErrorString(err));
-  fflush(stderr);
-  exit(err);
-}
+    #include <cufft.h>
 
-#else  // not O_HIP
+  #endif  // defined(PARIS) || defined(PARIS_GALACTIC)
 
-#include <cuda_runtime.h>
+  #define WARPSIZE                               32
+static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE;
+  #define hipLaunchKernelGGL(F, G, B, M, S, ...) F<<<G, B, M, S>>>(__VA_ARGS__)
+  #define __shfl_down(...)                       __shfl_down_sync(0xFFFFFFFF, __VA_ARGS__)
 
-#if defined(PARIS) || defined(PARIS_GALACTIC)
+#endif  // O_HIP
 
-#include <cufft.h>
+#define GPU_MAX_THREADS 256
 
-static void check(const cufftResult err, const char *const file, const int line)
+/*!
+ * \brief Check for CUDA/HIP error codes. Can be called wrapping a GPU function that returns a value or with no
+ * arguments and it will get the latest error code.
+ *
+ * \param[in] code The code to check. Defaults to the last error code
+ * \param[in] abort Whether or not to abort if an error is encountered. Defaults to True
+ * \param[in] location The location of the call. This should be left as the default value.
+ */
+inline void GPU_Error_Check(cudaError_t code = cudaPeekAtLastError(), bool abort = true,
+                            std::experimental::source_location location = std::experimental::source_location::current())
 {
-  if (err == CUFFT_SUCCESS) return;
-  fprintf(stderr,"CUFFT ERROR AT LINE %d OF FILE '%s': %d\n",line,file,err);
-  fflush(stderr);
-  exit(err);
+#ifndef DISABLE_GPU_ERROR_CHECKING
+  code = cudaDeviceSynchronize();
+
+  // Check the code
+  if (code != cudaSuccess) {
+    std::cout << "GPU_Error_Check: Failed at "
+              << "Line: " << location.line() << ", File: " << location.file_name()
+              << ", Function: " << location.function_name() << ", with code: " << cudaGetErrorString(code) << std::endl;
+    if (abort) {
+      chexit(code);
+    }
+  }
+#endif  // DISABLE_GPU_ERROR_CHECKING
 }
 
-#endif // defined(PARIS) || defined(PARIS_GALACTIC)
-
-static void check(const cudaError_t err, const char *const file, const int line)
+#if defined(PARIS) || defined(PARIS_GALACTIC)
+/*!
+ * \brief Check for CUFFT/HIPFFT error codes. Can be called wrapping a FFT function that returns a value
+ *
+ * \param[in] code The code to check
+ * \param[in] abort Whether or not to abort if an error is encountered. Defaults to True
+ * \param[in] location The location of the call. This should be left as the default value.
+ */
+inline void GPU_Error_Check(cufftResult_t code, bool abort = true,
+                            std::experimental::source_location location = std::experimental::source_location::current())
 {
-  if (err == cudaSuccess) return;
-  fprintf(stderr,"CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n",line,file,cudaGetErrorName(err),cudaGetErrorString(err));
-  fflush(stderr);
-  exit(err);
+  #ifndef DISABLE_GPU_ERROR_CHECKING
+  // Check the code
+  if (code != CUFFT_SUCCESS) {
+    std::cout << "GPU_Error_Check: Failed at "
+              << "Line: " << location.line() << ", File: " << location.file_name()
+              << ", Function: " << location.function_name() << ", with FFT code: " << code << std::endl;
+    if (abort) {
+      chexit(code);
+    }
+  }
+  #endif  // DISABLE_GPU_ERROR_CHECKING
 }
-
-#define WARPSIZE 32
-static constexpr int maxWarpsPerBlock = 1024/WARPSIZE;
-#define hipLaunchKernelGGL(F,G,B,M,S,...) F<<<G,B,M,S>>>(__VA_ARGS__)
-#define __shfl_down(...) __shfl_down_sync(0xFFFFFFFF, __VA_ARGS__)
-
-#endif  //O_HIP
-
-#define CHECK(X) check(X,__FILE__,__LINE__)
-
-#define GPU_MAX_THREADS 256
+#endif  // defined(PARIS) || defined(PARIS_GALACTIC)
 
 #if defined(__CUDACC__) || defined(__HIPCC__)
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun0(const int n0, const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0(const int n0, const F f)
 {
-  const int i0 = blockIdx.x*blockDim.x+threadIdx.x;
-  if (i0 < n0) f(i0);
+  const int i0 = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i0 < n0) {
+    f(i0);
+  }
 }
 
 template <typename F>
 void gpuFor(const int n0, const F f)
 {
-  if (n0 <= 0) return;
-  const int b0 = (n0+GPU_MAX_THREADS-1)/GPU_MAX_THREADS;
-  const int t0 = (n0+b0-1)/b0;
-  gpuRun0<<<b0,t0>>>(n0,f);
-  CHECK(cudaGetLastError());
+  if (n0 <= 0) {
+    return;
+  }
+  const int b0 = (n0 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS;
+  const int t0 = (n0 + b0 - 1) / b0;
+  gpuRun0<<<b0, t0>>>(n0, f);
+  GPU_Error_Check();
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun0x2(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0x2(const F f)
 {
   const int i0 = threadIdx.y;
   const int i1 = threadIdx.x;
-  f(i0,i1);
+  f(i0, i1);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun1x1(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x1(const F f)
 {
   const int i0 = blockIdx.x;
   const int i1 = threadIdx.x;
-  f(i0,i1);
+  f(i0, i1);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun2x0(const int n1, const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x0(const int n1, const F f)
 {
   const int i0 = blockIdx.y;
-  const int i1 = blockIdx.x*blockDim.x+threadIdx.x;
-  if (i1 < n1) f(i0,i1);
+  const int i1 = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i1 < n1) {
+    f(i0, i1);
+  }
 }
 
 template <typename F>
 void gpuFor(const int n0, const int n1, const F f)
 {
-  if ((n0 <= 0) || (n1 <= 0)) return;
-  const long nl01 = long(n0)*long(n1);
+  if ((n0 <= 0) || (n1 <= 0)) {
+    return;
+  }
+  const long nl01 = long(n0) * long(n1);
   assert(nl01 < long(INT_MAX));
 
   if (n1 > GPU_MAX_THREADS) {
-    const int b1 = (n1+GPU_MAX_THREADS-1)/GPU_MAX_THREADS;
-    const int t1 = (n1+b1-1)/b1;
-    gpuRun2x0<<<dim3(b1,n0),dim3(t1)>>>(n1,f);
-    CHECK(cudaGetLastError());
+    const int b1 = (n1 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS;
+    const int t1 = (n1 + b1 - 1) / b1;
+    gpuRun2x0<<<dim3(b1, n0), dim3(t1)>>>(n1, f);
+    GPU_Error_Check();
   } else if (nl01 > GPU_MAX_THREADS) {
-    gpuRun1x1<<<n0,n1>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun1x1<<<n0, n1>>>(f);
+    GPU_Error_Check();
   } else {
-    gpuRun0x2<<<1,dim3(n1,n0)>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun0x2<<<1, dim3(n1, n0)>>>(f);
+    GPU_Error_Check();
   }
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun0x3(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0x3(const F f)
 {
   const int i0 = threadIdx.z;
   const int i1 = threadIdx.y;
   const int i2 = threadIdx.x;
-  f(i0,i1,i2);
+  f(i0, i1, i2);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun1x2(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x2(const F f)
 {
   const int i0 = blockIdx.x;
   const int i1 = threadIdx.y;
   const int i2 = threadIdx.x;
-  f(i0,i1,i2);
+  f(i0, i1, i2);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun2x1(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x1(const F f)
 {
   const int i0 = blockIdx.y;
   const int i1 = blockIdx.x;
   const int i2 = threadIdx.x;
-  f(i0,i1,i2);
+  f(i0, i1, i2);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun3x0(const int n2, const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x0(const int n2, const F f)
 {
   const int i0 = blockIdx.z;
   const int i1 = blockIdx.y;
-  const int i2 = blockIdx.x*blockDim.x+threadIdx.x;
-  if (i2 < n2) f(i0,i1,i2);
+  const int i2 = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i2 < n2) {
+    f(i0, i1, i2);
+  }
 }
 
 template <typename F>
 void gpuFor(const int n0, const int n1, const int n2, const F f)
 {
-  if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0)) return;
-  const long nl12 = long(n1)*long(n2);
-  const long nl012 = long(n0)*nl12;
+  if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0)) {
+    return;
+  }
+  const long nl12  = long(n1) * long(n2);
+  const long nl012 = long(n0) * nl12;
   assert(nl012 < long(INT_MAX));
 
   if (n2 > GPU_MAX_THREADS) {
-    const int b2 = (n2+GPU_MAX_THREADS-1)/GPU_MAX_THREADS;
-    const int t2 = (n2+b2-1)/b2;
-    gpuRun3x0<<<dim3(b2,n1,n0),t2>>>(n2,f);
-    CHECK(cudaGetLastError());
+    const int b2 = (n2 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS;
+    const int t2 = (n2 + b2 - 1) / b2;
+    gpuRun3x0<<<dim3(b2, n1, n0), t2>>>(n2, f);
+    GPU_Error_Check();
   } else if (nl12 > GPU_MAX_THREADS) {
-    gpuRun2x1<<<dim3(n1,n0),n2>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun2x1<<<dim3(n1, n0), n2>>>(f);
+    GPU_Error_Check();
   } else if (nl012 > GPU_MAX_THREADS) {
-    gpuRun1x2<<<n0,dim3(n2,n1)>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun1x2<<<n0, dim3(n2, n1)>>>(f);
+    GPU_Error_Check();
   } else {
-    gpuRun0x3<<<1,dim3(n2,n1,n0)>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun0x3<<<1, dim3(n2, n1, n0)>>>(f);
+    GPU_Error_Check();
   }
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun1x3(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x3(const F f)
 {
   const int i0 = blockIdx.x;
   const int i1 = threadIdx.z;
   const int i2 = threadIdx.y;
   const int i3 = threadIdx.x;
-  f(i0,i1,i2,i3);
+  f(i0, i1, i2, i3);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun2x2(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x2(const F f)
 {
   const int i0 = blockIdx.y;
   const int i1 = blockIdx.x;
   const int i2 = threadIdx.y;
   const int i3 = threadIdx.x;
-  f(i0,i1,i2,i3);
+  f(i0, i1, i2, i3);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun3x1(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x1(const F f)
 {
   const int i0 = blockIdx.z;
   const int i1 = blockIdx.y;
   const int i2 = blockIdx.x;
   const int i3 = threadIdx.x;
-  f(i0,i1,i2,i3);
+  f(i0, i1, i2, i3);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun4x0(const int n23, const int n3, const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x0(const int n23, const int n3, const F f)
 {
-  const int i23 = blockIdx.x*blockDim.x+threadIdx.x;
+  const int i23 = blockIdx.x * blockDim.x + threadIdx.x;
   if (i23 < n23) {
     const int i0 = blockIdx.z;
     const int i1 = blockIdx.y;
-    const int i2 = i23/n3;
-    const int i3 = i23%n3;
-    f(i0,i1,i2,i3);
+    const int i2 = i23 / n3;
+    const int i3 = i23 % n3;
+    f(i0, i1, i2, i3);
   }
 }
 
 template <typename F>
 void gpuFor(const int n0, const int n1, const int n2, const int n3, const F f)
 {
-  if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) return;
-  const long nl23 = long(n2)*long(n3);
-  const long nl123 = long(n1)*nl23;
-  assert(long(n0)*nl123 < long(INT_MAX));
+  if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) {
+    return;
+  }
+  const long n23_long  = long(n2) * long(n3);
+  const long n123_long = long(n1) * n23_long;
+  assert(long(n0) * n123_long < long(INT_MAX));
 
-  const int n23 = int(nl23);
-  const int n123 = int(nl123);
+  const int n23  = int(n23_long);
+  const int n123 = int(n123_long);
   if (n3 > GPU_MAX_THREADS) {
-    const int b23 = (n23+GPU_MAX_THREADS-1)/GPU_MAX_THREADS;
-    const int t23 = (n23+b23-1)/b23;
-    gpuRun4x0<<<dim3(b23,n1,n0),t23>>>(n23,n3,f);
-    CHECK(cudaGetLastError());
+    const int b23 = (n23 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS;
+    const int t23 = (n23 + b23 - 1) / b23;
+    gpuRun4x0<<<dim3(b23, n1, n0), t23>>>(n23, n3, f);
+    GPU_Error_Check();
   } else if (n23 > GPU_MAX_THREADS) {
-    gpuRun3x1<<<dim3(n2,n1,n0),n3>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun3x1<<<dim3(n2, n1, n0), n3>>>(f);
+    GPU_Error_Check();
   } else if (n123 > GPU_MAX_THREADS) {
-    gpuRun2x2<<<dim3(n1,n0),dim3(n3,n2)>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun2x2<<<dim3(n1, n0), dim3(n3, n2)>>>(f);
+    GPU_Error_Check();
   } else {
-    gpuRun1x3<<<n0,dim3(n3,n2,n1)>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun1x3<<<n0, dim3(n3, n2, n1)>>>(f);
+    GPU_Error_Check();
   }
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun2x3(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x3(const F f)
 {
   const int i0 = blockIdx.y;
   const int i1 = blockIdx.x;
   const int i2 = threadIdx.z;
   const int i3 = threadIdx.y;
   const int i4 = threadIdx.x;
-  f(i0,i1,i2,i3,i4);
+  f(i0, i1, i2, i3, i4);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun3x2(const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x2(const F f)
 {
   const int i0 = blockIdx.z;
   const int i1 = blockIdx.y;
   const int i2 = blockIdx.x;
   const int i3 = threadIdx.y;
   const int i4 = threadIdx.x;
-  f(i0,i1,i2,i3,i4);
+  f(i0, i1, i2, i3, i4);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun4x1(const int n1, const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x1(const int n1, const F f)
 {
   const int i01 = blockIdx.z;
-  const int i0 = i01/n1;
-  const int i1 = i01%n1;
-  const int i2 = blockIdx.y;
-  const int i3 = blockIdx.x;
-  const int i4 = threadIdx.x;
-  f(i0,i1,i2,i3,i4);
+  const int i0  = i01 / n1;
+  const int i1  = i01 % n1;
+  const int i2  = blockIdx.y;
+  const int i3  = blockIdx.x;
+  const int i4  = threadIdx.x;
+  f(i0, i1, i2, i3, i4);
 }
 
 template <typename F>
-__global__ __launch_bounds__(GPU_MAX_THREADS)
-void gpuRun5x0(const int n1, const int n34, const int n4, const F f)
+__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun5x0(const int n1, const int n34, const int n4, const F f)
 {
-  const int i34 = blockIdx.x*blockDim.x+threadIdx.x;
+  const int i34 = blockIdx.x * blockDim.x + threadIdx.x;
   if (i34 < n34) {
     const int i01 = blockIdx.z;
-    const int i0 = i01/n1;
-    const int i1 = i01%n1;
-    const int i2 = blockIdx.y;
-    const int i3 = i34/n4;
-    const int i4 = i34%n4;
-    f(i0,i1,i2,i3,i4);
+    const int i0  = i01 / n1;
+    const int i1  = i01 % n1;
+    const int i2  = blockIdx.y;
+    const int i3  = i34 / n4;
+    const int i4  = i34 % n4;
+    f(i0, i1, i2, i3, i4);
   }
 }
 
 template <typename F>
 void gpuFor(const int n0, const int n1, const int n2, const int n3, const int n4, const F f)
 {
-  if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) return;
-  const long nl01 = long(n0)*long(n1);
-  const long nl34 = long(n3)*long(n4);
-  assert(nl01*long(n2)*nl34 < long(INT_MAX));
+  if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) {
+    return;
+  }
+  const long nl01 = long(n0) * long(n1);
+  const long nl34 = long(n3) * long(n4);
+  assert(nl01 * long(n2) * nl34 < long(INT_MAX));
 
   const int n34 = int(nl34);
   if (n4 > GPU_MAX_THREADS) {
     const int n01 = int(nl01);
-    const int b34 = (n34+GPU_MAX_THREADS-1)/GPU_MAX_THREADS;
-    const int t34 = (n34+b34-1)/b34;
-    gpuRun5x0<<<dim3(b34,n2,n01),t34>>>(n1,n34,n4,f);
-    CHECK(cudaGetLastError());
+    const int b34 = (n34 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS;
+    const int t34 = (n34 + b34 - 1) / b34;
+    gpuRun5x0<<<dim3(b34, n2, n01), t34>>>(n1, n34, n4, f);
+    GPU_Error_Check();
   } else if (n34 > GPU_MAX_THREADS) {
-    const int n01 = n0*n1;
-    gpuRun4x1<<<dim3(n3,n2,n01),n4>>>(n1,f);
-    CHECK(cudaGetLastError());
-  } else if (n2*n34 > GPU_MAX_THREADS) {
-    gpuRun3x2<<<dim3(n2,n1,n0),dim3(n4,n3)>>>(f);
-    CHECK(cudaGetLastError());
+    const int n01 = n0 * n1;
+    gpuRun4x1<<<dim3(n3, n2, n01), n4>>>(n1, f);
+    GPU_Error_Check();
+  } else if (n2 * n34 > GPU_MAX_THREADS) {
+    gpuRun3x2<<<dim3(n2, n1, n0), dim3(n4, n3)>>>(f);
+    GPU_Error_Check();
   } else {
-    gpuRun2x3<<<dim3(n1,n0),dim3(n4,n3,n2)>>>(f);
-    CHECK(cudaGetLastError());
+    gpuRun2x3<<<dim3(n1, n0), dim3(n4, n3, n2)>>>(f);
+    GPU_Error_Check();
   }
 }
 
-#define GPU_LAMBDA [=] __device__
+  #define GPU_LAMBDA [=] __device__
 
 #endif
diff --git a/src/utils/gpu_arrays_functions.cu b/src/utils/gpu_arrays_functions.cu
index 2111f0907..0a84ef64e 100644
--- a/src/utils/gpu_arrays_functions.cu
+++ b/src/utils/gpu_arrays_functions.cu
@@ -1,75 +1,62 @@
+#include <iostream>
+
+#include "../global/global_cuda.h"
 #include "../utils/error_handling.h"
 #include "../utils/gpu.hpp"
-#include "../global/global_cuda.h"
 #include "../utils/gpu_arrays_functions.h"
-#include <iostream>
 
+void Extend_GPU_Array_Real(Real **current_array_d, int current_size, int new_size, bool print_out)
+{
+  if (new_size <= current_size) {
+    return;
+  }
+  if (print_out) {
+    std::cout << " Extending GPU Array, size: " << current_size << "  new_size: " << new_size << std::endl;
+  }
 
-void Extend_GPU_Array_Real( Real **current_array_d, int current_size, int new_size, bool print_out ){
-  
-  if ( new_size <= current_size ) return;
-  if ( print_out ) std::cout << " Extending GPU Array, size: " << current_size << "  new_size: " << new_size << std::endl;
-  
   size_t global_free, global_total;
-  CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) );
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
   cudaDeviceSynchronize();
-  #ifdef PRINT_GPU_MEMORY
-  printf( "ReAllocating GPU Memory:  %d  MB free \n", (int)  global_free/1000000);
-  #endif
-  
-  if ( global_free < new_size*sizeof(Real) ){
-    printf( "ERROR: Not enough global device memory \n" );
-    printf( " Available Memory: %d  MB \n", (int) (global_free/1000000)  );
-    printf( " Requested Memory: %d  MB \n", (int) (new_size*sizeof(Real)/1000000)  );
+#ifdef PRINT_GPU_MEMORY
+  printf("ReAllocating GPU Memory:  %d  MB free \n", (int)global_free / 1000000);
+#endif
+
+  if (global_free < new_size * sizeof(Real)) {
+    printf("ERROR: Not enough global device memory \n");
+    printf(" Available Memory: %d  MB \n", (int)(global_free / 1000000));
+    printf(" Requested Memory: %d  MB \n", (int)(new_size * sizeof(Real) / 1000000));
     // exit(-1);
   }
-  
+
   Real *new_array_d;
-  CudaSafeCall( cudaMalloc((void**)&new_array_d,  new_size*sizeof(Real)) );
+  GPU_Error_Check(cudaMalloc((void **)&new_array_d, new_size * sizeof(Real)));
   cudaDeviceSynchronize();
-  CudaCheckError();
-  if ( new_array_d == NULL ){
+  GPU_Error_Check();
+  if (new_array_d == NULL) {
     std::cout << " Error When Allocating New GPU Array" << std::endl;
     chexit(-1);
   }
-  
+
   // Copy the content of the original array to the new array
-  CudaSafeCall( cudaMemcpy( new_array_d, *current_array_d, current_size*sizeof(Real), cudaMemcpyDeviceToDevice ) );	
+  GPU_Error_Check(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(Real), cudaMemcpyDeviceToDevice));
   cudaDeviceSynchronize();
-  CudaCheckError();
-    
+  GPU_Error_Check();
+
   // size_t global_free_before, global_free_after;
-  // CudaSafeCall( cudaMemGetInfo( &global_free_before, &global_total ) );
+  // GPU_Error_Check( cudaMemGetInfo( &global_free_before, &global_total ) );
   // cudaDeviceSynchronize();
-  
+
   // Free the original array
   cudaFree(*current_array_d);
   cudaDeviceSynchronize();
-  CudaCheckError();
-  
-  // CudaSafeCall( cudaMemGetInfo( &global_free_after, &global_total ) );
+  GPU_Error_Check();
+
+  // GPU_Error_Check( cudaMemGetInfo( &global_free_after, &global_total ) );
   // cudaDeviceSynchronize();
-  // 
-  // printf("Freed Memory: %d MB\n", (int) (global_free_after - global_free_before)/1000000 );
-  
+  //
+  // printf("Freed Memory: %d MB\n", (int) (global_free_after -
+  // global_free_before)/1000000 );
+
   // Replace the pointer of the original array with the new one
   *current_array_d = new_array_d;
-  
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/utils/gpu_arrays_functions.h b/src/utils/gpu_arrays_functions.h
index aae5fa2dc..f15b379ab 100644
--- a/src/utils/gpu_arrays_functions.h
+++ b/src/utils/gpu_arrays_functions.h
@@ -1,10 +1,58 @@
 #ifndef GPU_ARRAY_FUNCTIONS_H
 #define GPU_ARRAY_FUNCTIONS_H
 
-#include "../global/global.h"
+#include <iostream>
 
-void Extend_GPU_Array_Real( Real **current_array_d, int current_size, int new_size, bool print_out );
+#include "../global/global_cuda.h"
+#include "../utils/error_handling.h"
+#include "../utils/gpu.hpp"
+#include "../utils/gpu_arrays_functions.h"
 
+template <typename T>
+void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, bool print_out)
+{
+  if (new_size <= current_size) {
+    return;
+  }
+  if (print_out) {
+    std::cout << " Extending GPU Array, size: " << current_size << "  new_size: " << new_size << std::endl;
+  }
 
+  size_t global_free, global_total;
+  GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total));
+  cudaDeviceSynchronize();
+#ifdef PRINT_GPU_MEMORY
+  printf("ReAllocating GPU Memory:  %ld  MB free \n", global_free / 1000000);
+#endif
 
-#endif
\ No newline at end of file
+  if (global_free < new_size * sizeof(T)) {
+    printf("ERROR: Not enough global device memory \n");
+    printf(" Available Memory: %ld  MB \n", global_free / 1000000);
+    printf(" Requested Memory: %ld  MB \n", new_size * sizeof(T) / 1000000);
+    exit(-1);
+  }
+
+  T *new_array_d;
+  GPU_Error_Check(cudaMalloc((void **)&new_array_d, new_size * sizeof(T)));
+  cudaDeviceSynchronize();
+  GPU_Error_Check();
+  if (new_array_d == NULL) {
+    std::cout << " Error When Allocating New GPU Array" << std::endl;
+    chexit(-1);
+  }
+
+  // Copy the content of the original array to the new array
+  GPU_Error_Check(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(T), cudaMemcpyDeviceToDevice));
+  cudaDeviceSynchronize();
+  GPU_Error_Check();
+
+  // Free the original array
+  cudaFree(*current_array_d);
+  cudaDeviceSynchronize();
+  GPU_Error_Check();
+
+  // Replace the pointer of the original array with the new one
+  *current_array_d = new_array_d;
+}
+
+#endif
diff --git a/src/utils/hydro_utilities.cpp b/src/utils/hydro_utilities.cpp
index 7fa7c1894..bc649c75c 100644
--- a/src/utils/hydro_utilities.cpp
+++ b/src/utils/hydro_utilities.cpp
@@ -1,5 +1,6 @@
 #include "../utils/hydro_utilities.h"
 
-namespace hydro_utilities {
+namespace hydro_utilities
+{
 
-} // end namespace hydro_utilities
\ No newline at end of file
+}  // end namespace hydro_utilities
\ No newline at end of file
diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h
index 51439ac29..1a464e899 100644
--- a/src/utils/hydro_utilities.h
+++ b/src/utils/hydro_utilities.h
@@ -14,60 +14,201 @@
 #include "../global/global.h"
 #include "../global/global_cuda.h"
 #include "../utils/gpu.hpp"
+#include "../utils/math_utilities.h"
+#include "../utils/mhd_utilities.h"
 
+/*!
+ * INDEX OF VARIABLES
+ * P : pressure
+ * vx, vy, vz : x, y, and z velocity
+ * d : density
+ * E : energy
+ * T : temperature
+ * mx, my, mz : x, y, and z momentum
+ * n : number density
+ */
+
+namespace hydro_utilities
+{
+
+inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy,
+                                                        Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0,
+                                                        Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0)
+{
+  Real pressure = E - 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz);
+
+#ifdef MHD
+  pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z);
+#endif  // MHD
+
+  return fmax((gamma - 1.) * pressure, TINY_NUMBER);
+}
+
+inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my,
+                                                        Real const &mz, Real const &gamma, Real const &magnetic_x = 0.0,
+                                                        Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0)
+{
+  Real pressure = E - 0.5 * math_utils::SquareMagnitude(mx, my, mz) / d;
+
+#ifdef MHD
+  pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z);
+#endif  // MHD
+
+  return fmax((gamma - 1.) * pressure, TINY_NUMBER);
+}
+
+inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n)
+{
+  Real T = P * PRESSURE_UNIT / (n * KB);
+  return T;
+}
+
+/*!
+ * \brief Compute the temperature from the conserved variables
+ *
+ * \param[in] E The energy
+ * \param[in] d The density
+ * \param[in] mx The momentum in the X-direction
+ * \param[in] my The momentum in the Y-direction
+ * \param[in] mz The momentum in the Z-direction
+ * \param[in] gamma The adiabatic index
+ * \param[in] n The number density
+ * \param[in] magnetic_x The cell centered magnetic field in the X-direction
+ * \param[in] magnetic_y The cell centered magnetic field in the Y-direction
+ * \param[in] magnetic_z The cell centered magnetic field in the Z-direction
+ * \return Real The temperature of the gas in a cell
+ */
+inline __host__ __device__ Real Calc_Temp_Conserved(Real const E, Real const d, Real const mx, Real const my,
+                                                    Real const mz, Real const gamma, Real const n,
+                                                    Real const magnetic_x = 0.0, Real const magnetic_y = 0.0,
+                                                    Real const magnetic_z = 0.0)
+{
+  Real const P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma, magnetic_x, magnetic_y, magnetic_z);
+  return Calc_Temp(P, n);
+}
+
+#ifdef DE
+/*!
+ * \brief Compute the temperature when DE is turned on
+ *
+ * \param[in] gas_energy The total gas energy in the cell. This is the value stored in the grid at
+ * grid_enum::GasEnergy
+ * \param[in] gamma The adiabatic index
+ * \param[in] n The number density
+ * \return Real The temperature
+ */
+inline __host__ __device__ Real Calc_Temp_DE(Real const gas_energy, Real const gamma, Real const n)
+{
+  return gas_energy * (gamma - 1.0) * PRESSURE_UNIT / (n * KB);
+}
+#endif  // DE
+
+inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy,
+                                                      Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0,
+                                                      Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0)
+{
+  // Compute and return energy
+  Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz);
+
+#ifdef MHD
+  energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z);
+#endif  // MHD
+
+  return energy;
+}
+
+inline __host__ __device__ Real Calc_Energy_Conserved(Real const &P, Real const &d, Real const &momentum_x,
+                                                      Real const &momentum_y, Real const &momentum_z, Real const &gamma,
+                                                      Real const &magnetic_x = 0.0, Real const &magnetic_y = 0.0,
+                                                      Real const &magnetic_z = 0.0)
+{
+  // Compute and return energy
+  Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) +
+                (0.5 / d) * math_utils::SquareMagnitude(momentum_x, momentum_y, momentum_z);
+
+#ifdef MHD
+  energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z);
+#endif  // MHD
+
+  return energy;
+}
+
+inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected,
+                                                     Real const &gamma)
+{
+  Real U, P;
+  Real eta = DE_ETA_1;
+  // Apply same condition as Byan+2013 to select the internal energy from which
+  // compute pressure.
+  if (U_total / E > eta) {
+    U = U_total;
+  } else {
+    U = U_advected;
+  }
+  P = U * (gamma - 1.0);
+  return fmax(P, (Real)TINY_NUMBER);
+  ;
+}
+
+/*!
+ * \brief Compute the kinetic energy from the density and velocities
+ *
+ * \param[in] d The density
+ * \param[in] vx The x velocity
+ * \param[in] vy The y velocity
+ * \param[in] vz The z velocity
+ * \return Real The kinetic energy
+ */
+inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, Real const &vx, Real const &vy,
+                                                                  Real const &vz)
+{
+  return 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz);
+}
+
+/*!
+ * \brief Compute the kinetic energy from the density and momenta
+ *
+ * \param[in] d The density
+ * \param[in] mx The x momentum
+ * \param[in] my The y momentum
+ * \param[in] mz The z momentum
+ * \return Real The kinetic energy
+ */
+inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, Real const &mx, Real const &my,
+                                                                  Real const &mz)
+{
+  return (0.5 / d) * math_utils::SquareMagnitude(mx, my, mz);
+}
+
+/*!
+ * \brief Compute the sound speed in the cell from conserved variables
+ *
+ * \param E Energy
+ * \param d densidy
+ * \param mx x momentum
+ * \param my y momentum
+ * \param mz z momentum
+ * \param gamma adiabatic index
+ * \return Real The sound speed
+ */
+inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my,
+                                                 Real const &mz, Real const &gamma)
+{
+  Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma);
+  return sqrt(gamma * P / d);
+}
 
 /*!
-* INDEX OF VARIABLES
-* P : pressure
-* vx, vy, vz : x, y, and z velocity
-* d : density
-* E : energy
-* T : temperature
-* mx, my, mz : x, y, and z momentum
-* n : number density
-*/
-
-namespace hydro_utilities {
-    inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) {
-        Real P;
-        P = (E - 0.5 * d * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0);
-        P = fmax(P, TINY_NUMBER);
-        return P;
-    }
-
-    inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) {
-        Real P= (E - 0.5 * (mx*mx + my*my + mz*mz) / d) * (gamma - 1.);
-        return fmax(P, TINY_NUMBER);
-    }
-
-    inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) {
-        Real T = P * PRESSURE_UNIT / (n * KB);
-        return T;
-    }
-
-    #ifdef DE
-    inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const&n) {
-        Real T =  d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB);
-        return T;
-    }
-    #endif // DE
-
-    inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) {
-        // Compute and return energy
-        return (fmax(P, TINY_NUMBER)/(gamma - 1.)) + 0.5 * d * (vx*vx + vy*vy + vz*vz);
-    }
-
-    inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) {
-        Real U, P;
-        Real eta = DE_ETA_1;
-        // Apply same condition as Byan+2013 to select the internal energy from which compute pressure.
-        if (U_total/E > eta) {
-            U = U_total;
-        } else {
-            U = U_advected;
-        }
-        P = U * (gamma - 1.0);
-        return P;
-    }
-
-}
\ No newline at end of file
+ * \brief Compute the sound in the cell from primitive variables
+ *
+ * \param P
+ * \param d
+ * \param gamma
+ * \return __host__
+ */
+inline __host__ __device__ Real Calc_Sound_Speed(Real const &P, Real const &d, Real const &gamma)
+{
+  return sqrt(gamma * P / d);
+}
+
+}  // namespace hydro_utilities
diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp
index e8a066d12..fe0cbe9e6 100644
--- a/src/utils/hydro_utilities_tests.cpp
+++ b/src/utils/hydro_utilities_tests.cpp
@@ -1,129 +1,264 @@
 /*!
  * \file hyo_utilities_tests.cpp
- * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu)
- * \brief Tests for the contents of hydro_utilities.h and hydro_utilities.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie
+ * (helenarichie@pitt.edu) \brief Tests for the contents of hydro_utilities.h
+ * and hydro_utilities.cpp
  *
  */
 
 // STL Includes
-#include <vector>
-#include <string>
 #include <iostream>
+#include <string>
+#include <vector>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local Includes
-#include "../utils/testing_utilities.h"
-#include "../utils/hydro_utilities.h"
 #include "../global/global.h"
+#include "../utils/hydro_utilities.h"
+#include "../utils/testing_utilities.h"
 
 /*!
-* INDEX OF VARIABLES
-* P : pressure
-* vx, vy, vz : x, y, and z velocity
-* d : density
-* E : energy
-* T : temperature
-* mx, my, mz : x, y, and z momentum
-* n : number density
-*/
+ * INDEX OF VARIABLES
+ * P : pressure
+ * vx, vy, vz : x, y, and z velocity
+ * d : density
+ * E : energy
+ * T : temperature
+ * mx, my, mz : x, y, and z momentum
+ * n : number density
+ */
 
 // =============================================================================
 // Local helper functions
 
 namespace
 {
-    struct TestParams
-    {
-        double gamma = 5./3.;
-        std::vector<double> d {1.0087201154e-15, 1.0756968986e2, 1.0882403847e100};
-        std::vector<double> vx {1.0378624601e-100, 1.0829278656e2, 1.0800514112e100};
-        std::vector<double> vy {1.0583469014e-100, 1.0283073464e2, 1.0725717864e100};
-        std::vector<double> vz {1.0182972216e-100, 1.0417748226e2, 1.0855352639e100};
-        std::vector<double> mx {0.2340416681e-100, 0.1019429453e2, 0.5062596954e100};
-        std::vector<double> my {0.9924582299e-100, 0.1254780684e2, 0.5939640992e100};
-        std::vector<double> mz {0.6703192739e-100, 0.5676716066e2, 0.2115881803e100};
-        std::vector<double> E {20.9342082433e-90, 20.9976906577e10, 20.9487120853e300};
-        std::vector<double> P {2.2244082909e-10, 8.6772951021e2, 6.7261085663e100};
-        std::vector<double> n {3.0087201154e-10, 1.3847303413e2, 1.0882403847e100};
-        std::vector<double> ge {4.890374019e-10, 1.0756968986e2, 3.8740982372e100};
-        std::vector<double> U_total {2.389074039e-10, 4.890374019e2, 6.8731436293e100};
-        std::vector<double> U_advected {1.3847303413e-10, 1.0756968986e2, 1.0882403847e100};
-        std::vector<std::string> names{"Small number case", "Medium number case", "Large number case"};
-    };
+struct TestParams {
+  double gamma = 5. / 3.;
+  std::vector<double> d{1.0087201154e-15, 1.0756968986e2, 1.0882403847e100};
+  std::vector<double> vx{1.0378624601e-100, 1.0829278656e2, 1.0800514112e100};
+  std::vector<double> vy{1.0583469014e-100, 1.0283073464e2, 1.0725717864e100};
+  std::vector<double> vz{1.0182972216e-100, 1.0417748226e2, 1.0855352639e100};
+  std::vector<double> mx{0.2340416681e-100, 0.1019429453e2, 0.5062596954e100};
+  std::vector<double> my{0.9924582299e-100, 0.1254780684e2, 0.5939640992e100};
+  std::vector<double> mz{0.6703192739e-100, 0.5676716066e2, 0.2115881803e100};
+  std::vector<double> E{20.9342082433e-90, 20.9976906577e10, 20.9487120853e300};
+  std::vector<double> P{2.2244082909e-10, 8.6772951021e2, 6.7261085663e100};
+  std::vector<double> n{3.0087201154e-10, 1.3847303413e2, 1.0882403847e100};
+  std::vector<double> ge{4.890374019e-10, 1.0756968986e2, 3.8740982372e100};
+  std::vector<double> U_total{2.389074039e-10, 4.890374019e2, 6.8731436293e100};
+  std::vector<double> U_advected{1.3847303413e-10, 1.0756968986e2, 1.0882403847e100};
+  std::vector<double> pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100};
+  std::vector<double> magnetic_x{2.8568843801e-100, 9.2400807786e2, 2.1621115264e100};
+  std::vector<double> magnetic_y{9.2900880344e-100, 8.0382409757e2, 6.6499532343e100};
+  std::vector<double> magnetic_z{9.5795678229e-100, 3.3284839263e2, 9.2337456649e100};
+  std::vector<std::string> names{"Small number case", "Medium number case", "Large number case"};
+};
+}  // namespace
+
+TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+#ifdef MHD
+  std::vector<double> fiducial_pressure{0, 139982878676.5015, 1.2697896247496674e+301};
+#else   // not MHD
+  std::vector<double> fiducial_pressure{1e-20, 139983415580.5549, 1.2697896247496674e+301};
+#endif  // MHD
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(
+        parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i),
+        parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i));
+
+    testing_utilities::Check_Results(fiducial_pressure.at(i), test_Ps, parameters.names.at(i));
+  }
 }
 
-TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) {
-    TestParams parameters;
-    std::vector<double> fiducial_Ps {1e-20, 139983415580.5549, 1.2697896247496674e+301};
+TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+#ifdef MHD
+  std::vector<double> fiducial_pressure{0, 139984067469.81754, 1.3965808056866668e+301};
+#else   // not MHD
+  std::vector<double> fiducial_pressure{1e-20, 139984604373.87094, 1.3965808056866668e+301};
+#endif  // MHD
 
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma);
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_pressure = hydro_utilities::Calc_Pressure_Conserved(
+        parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i),
+        parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i));
 
-        testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i));
-    }
+    testing_utilities::Check_Results(fiducial_pressure.at(i), test_pressure, parameters.names.at(i));
+  }
 }
 
-TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) {
-    TestParams parameters;
-    std::vector<double> fiducial_Ps {1e-20, 139984604373.87094, 1.3965808056866668e+301};
+TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, NegativePressureExpectAutomaticFix)
+{
+  TestParams parameters;
 
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real test_Ps = hydro_utilities::Calc_Pressure_Conserved(parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma);
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_pressure = hydro_utilities::Calc_Pressure_Primitive(
+        parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i),
+        parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i));
 
-        testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i));
-    }
+    // I'm using the binary equality assertion here since in the case of
+    // negative pressure the function should return exactly TINY_NUMBER
+    EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl;
+  }
 }
 
-TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) {
-    TestParams parameters;
-    std::vector<double> fiducial_Ts {3465185.0560059389, 29370603.906644326, 28968949.83344138};
+TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, NegativePressureExpectAutomaticFix)
+{
+  TestParams parameters;
 
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i));
+  for (size_t i = 0; i < parameters.names.size() - 1; i++) {
+    Real test_pressure = hydro_utilities::Calc_Pressure_Conserved(
+        1E-10 * parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.mx.at(i), 1E4 * parameters.my.at(i),
+        1E4 * parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i),
+        parameters.magnetic_z.at(i));
 
-        testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i));
-    }
+    // I'm using the binary equality assertion here since in the case of
+    // negative pressure the function should return exactly TINY_NUMBER
+    EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl;
+  }
+}
+
+TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducial_Ts{3465185.0560059389, 29370603.906644326, 28968949.83344138};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i));
+
+    testing_utilities::Check_Results(fiducial_Ts.at(i), test_Ts, parameters.names.at(i));
+  }
 }
 
 #ifdef DE
-TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) {
-    TestParams parameters;
-    std::vector<double> fiducial_Ts {5.123106988008801e-09, 261106139.02514684, 1.2105231166585662e+107};
+TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducial_Ts{5.123106988008801e-09, 261106139.02514684, 1.2105231166585662e+107};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Ts =
+        hydro_utilities::Calc_Temp_DE(parameters.d.at(i) * parameters.ge.at(i), parameters.gamma, parameters.n.at(i));
 
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real test_Ts = hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i));
+    testing_utilities::Check_Results(fiducial_Ts.at(i), test_Ts, parameters.names.at(i));
+  }
+}
+#endif  // DE
+
+TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+#ifdef MHD
+  std::vector<double> fiducial_energy{3.3366124363499997e-10, 2589863.8420712831, 1.9018677140549926e+300};
+#else   // not MHD
+  std::vector<double> fiducial_energy{3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300};
+#endif  // MHD
 
-        testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i));
-    }
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Es = hydro_utilities::Calc_Energy_Primitive(
+        parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i),
+        parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i));
+
+    testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i));
+  }
 }
-#endif // DE
 
-TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) {
-    TestParams parameters;
-    std::vector<double> fiducial_Es {3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300};
+TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+#ifdef MHD
+  std::vector<double> fiducial_energy{3.3366124363499997e-10, 806673.86799851817, 6.7079331637514162e+201};
+#else   // not MHD
+  std::vector<double> fiducial_energy{3.3366124363499997e-10, 1317.7878679524658, 1.0389584427972784e+101};
+#endif  // MHD
 
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real test_Es = hydro_utilities::Calc_Energy_Primitive(parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma);
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Es = hydro_utilities::Calc_Energy_Conserved(
+        parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i),
+        parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i));
 
-        testingUtilities::checkResults(fiducial_Es.at(i), test_Es, parameters.names.at(i));
-    }
+    testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i));
+  }
 }
 
-TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) {
-    TestParams parameters;
-    std::vector<double> fiducial_Ps {1.5927160260000002e-10, 71.713126573333341, 7.2549358980000001e+99};
+TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, NegativePressureExpectAutomaticFix)
+{
+  TestParams parameters;
+#ifdef MHD
+  std::vector<double> fiducial_energy{1.4999999999999998e-20, 2588562.2478059679, 1.9018677140549926e+300};
+#else   // not MHD
+  std::vector<double> fiducial_energy{0, 1783206.1676754025, 1.9018677140549926e+300};
+#endif  // MHD
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Es = hydro_utilities::Calc_Energy_Primitive(
+        -parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i),
+        parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i));
+
+    testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i));
+  }
+}
 
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), parameters.U_advected.at(i), parameters.gamma);
+TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, NegativePressureExpectAutomaticFix)
+{
+  TestParams parameters;
+#ifdef MHD
+  std::vector<double> fiducial_energy{0, 805372.27373320318, 6.7079331637514162e+201};
+#else   // not MHD
+  std::vector<double> fiducial_energy{0, 16.193602637465997, 3.0042157852278494e+99};
+#endif  // MHD
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Es = hydro_utilities::Calc_Energy_Conserved(
+        -parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i),
+        parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i));
 
-        testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i));
-    }
+    testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i));
+  }
 }
+
+TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducial_Ps{1.5927160260000002e-10, 71.713126573333341, 7.2549358980000001e+99};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i),
+                                                         parameters.U_advected.at(i), parameters.gamma);
+
+    testing_utilities::Check_Results(fiducial_Ps.at(i), test_Ps, parameters.names.at(i));
+  }
+}
+
+TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducialEnergies{0.0, 6.307524975350106e-145, 1.9018677140549924e+150};
+  double const coef = 1E-50;
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(
+        coef * parameters.d.at(i), coef * parameters.vx.at(i), coef * parameters.vy.at(i), coef * parameters.vz.at(i));
+
+    testing_utilities::Check_Results(fiducialEnergies.at(i), testEnergy, parameters.names.at(i));
+  }
+}
+
+TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducialEnergies{0.0, 0.0, 3.0042157852278499e+49};
+  double const coef = 1E-50;
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum(
+        coef * parameters.d.at(i), coef * parameters.mx.at(i), coef * parameters.my.at(i), coef * parameters.mz.at(i));
+
+    testing_utilities::Check_Results(fiducialEnergies.at(i), testEnergy, parameters.names.at(i));
+  }
+}
\ No newline at end of file
diff --git a/src/utils/math_utilities.h b/src/utils/math_utilities.h
new file mode 100644
index 000000000..68d13f19d
--- /dev/null
+++ b/src/utils/math_utilities.h
@@ -0,0 +1,101 @@
+/*!
+ * \file math_utilities.h
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains various functions for common mathematical operations
+ *
+ */
+
+#pragma once
+
+// STL Includes
+#include <cmath>
+#include <tuple>
+
+// External Includes
+
+// Local Includes
+#include "../global/global.h"
+#include "../global/global_cuda.h"
+#include "../utils/gpu.hpp"
+
+namespace math_utils
+{
+// =========================================================================
+/*!
+ * \brief Rotate cartesian coordinates. All arguments are cast to double
+ * then rotated. If the type is 'int' then the value is rounded to the
+ * nearest int
+ *
+ * \details Rotation such that when pitch=90 and yaw=0 x1_rot = -x3 and when
+ * pitch=0 and yaw=90 x1_rot = -x2
+ *
+ * \tparam T The return type
+ * \param[in] x_1 x1 coordinate
+ * \param[in] x_2 x2 coordinate
+ * \param[in] x_3 x3 coordinate
+ * \param[in] pitch Pitch angle in radians
+ * \param[in] yaw Yaw angle in radians
+ * \return std::tuple<T, T, T> The new, rotated, coordinates in the
+ * order <x1, x2, x2>. Intended to be captured with structured binding
+ */
+template <typename T>
+inline std::tuple<T, T, T> rotateCoords(Real const &x_1, Real const &x_2, Real const &x_3, Real const &pitch,
+                                        Real const &yaw)
+{
+  // Compute the sines and cosines. Correct for floating point errors if
+  // the angle is 0.5*M_PI
+  Real const sin_yaw   = std::sin(yaw);
+  Real const cos_yaw   = (yaw == 0.5 * M_PI) ? 0 : std::cos(yaw);
+  Real const sin_pitch = std::sin(pitch);
+  Real const cos_pitch = (pitch == 0.5 * M_PI) ? 0 : std::cos(pitch);
+
+  // Perform the rotation
+  Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + (x_3 * sin_pitch * cos_yaw);
+  Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + (x_3 * sin_pitch * sin_yaw);
+  Real const x_3_rot = (x_1 * sin_pitch) + (x_3 * cos_pitch);
+
+  if (std::is_same<T, int>::value) {
+    return {round(x_1_rot), round(x_2_rot), round(x_3_rot)};
+  } else if (std::is_same<T, Real>::value) {
+    return {x_1_rot, x_2_rot, x_3_rot};
+  }
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Compute the dot product of a and b.
+ *
+ * \param[in] a1 The first element of a
+ * \param[in] a2 The second element of a
+ * \param[in] a3 The third element of a
+ * \param[in] b1 The first element of b
+ * \param[in] b2 The second element of b
+ * \param[in] b3 The third element of b
+ *
+ * \return Real The dot product of a and b
+ */
+inline __device__ __host__ Real dotProduct(Real const &a1, Real const &a2, Real const &a3, Real const &b1,
+                                           Real const &b2, Real const &b3)
+{
+  return a1 * b1 + ((a2 * b2) + (a3 * b3));
+};
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Compute the magnitude of a vector
+ *
+ * \param[in] v1 The first element of the vector
+ * \param[in] v2 The second element of the vector
+ * \param[in] v3 The third element of the vector
+ *
+ * \return Real The dot product of a and b
+ */
+inline __device__ __host__ Real SquareMagnitude(Real const &v1, Real const &v2, Real const &v3)
+{
+  return dotProduct(v1, v2, v3, v1, v2, v3);
+};
+// =========================================================================
+
+}  // namespace math_utils
diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp
new file mode 100644
index 000000000..a49cd8a41
--- /dev/null
+++ b/src/utils/math_utilities_tests.cpp
@@ -0,0 +1,77 @@
+/*!
+ * \file math_utilities_tests.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Tests for the contents of math_utilities.h
+ *
+ */
+
+// STL Includes
+#include <math.h>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../global/global.h"
+#include "../utils/math_utilities.h"
+#include "../utils/testing_utilities.h"
+
+// =============================================================================
+TEST(tALLRotateCoords, CorrectInputExpectCorrectOutput)
+{
+  // Fiducial values
+  double const x_1         = 19.2497333410;
+  double const x_2         = 60.5197699003;
+  double const x_3         = 86.0613942621;
+  double const pitch       = 1.239 * M_PI;
+  double const yaw         = 0.171 * M_PI;
+  double const x_1_rot_fid = -31.565679455456568;
+  double const x_2_rot_fid = 14.745363873361605;
+  double const x_3_rot_fid = -76.05402749550727;
+
+  auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords<double>(x_1, x_2, x_3, pitch, yaw);
+
+  testing_utilities::Check_Results<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values");
+  testing_utilities::Check_Results<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values");
+  testing_utilities::Check_Results<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values");
+}
+// =============================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the math_utils::dotProduct function
+ *
+ */
+TEST(tALLDotProduct, CorrectInputExpectCorrectOutput)
+{
+  std::vector<double> a{21.503067766457753, 48.316634031589935, 81.12177317622657},
+      b{38.504606872151484, 18.984145880030045, 89.52561861038686};
+
+  double const fiducialDotProduct = 9007.6941261535867;
+
+  double testDotProduct;
+
+  testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), b.at(0), b.at(1), b.at(2));
+
+  // Now check results
+  testing_utilities::Check_Results(fiducialDotProduct, testDotProduct, "dot product");
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Test the math_utils::dotProduct function
+ *
+ */
+TEST(tALLSquareMagnitude, CorrectInputExpectCorrectOutput)
+{
+  std::vector<double> a = {11.503067766457753, 98.316634031589935, 41.12177317622657};
+
+  double const fiducial_square_magnitude = 11489.481324498336;
+
+  double test_square_magnitude = math_utils::SquareMagnitude(a.at(0), a.at(1), a.at(2));
+
+  // Now check results
+  testing_utilities::Check_Results(fiducial_square_magnitude, test_square_magnitude, "dot product");
+}
+// =========================================================================
\ No newline at end of file
diff --git a/src/utils/mhd_utilities.cpp b/src/utils/mhd_utilities.cpp
deleted file mode 100644
index c7747830e..000000000
--- a/src/utils/mhd_utilities.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-/*!
- * \file mhd_utilities.cpp
- * \author Robert 'Bob' Caddy (rvc@pitt.edu)
- * \brief Contains the implementation of various utility functions for MHD
- *
- */
-
-// STL Includes
-
-// External Includes
-
-// Local Includes
-#include "../utils/mhd_utilities.h"
-
-namespace mhdUtils
-{
-
-} // end  namespace mhdUtils
\ No newline at end of file
diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu
new file mode 100644
index 000000000..bceb4abc1
--- /dev/null
+++ b/src/utils/mhd_utilities.cu
@@ -0,0 +1,46 @@
+/*!
+ * \file mhd_utilities.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Contains the implementation of various utility functions for MHD and
+ * for the various kernels, functions, and tools required for the 3D VL+CT MHD
+ * integrator. Due to the CUDA/HIP compiler requiring that device functions be
+ * directly accessible to the file they're used in most device functions will be
+ * implemented in the header file
+ *
+ */
+
+// STL Includes
+
+// External Includes
+
+// Local Includes
+#include "../utils/mhd_utilities.h"
+
+namespace mhd::utils
+{
+#ifdef MHD
+void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserved const &C,
+                                               std::vector<Real> const &vectorPotential)
+{
+  // Compute the magnetic field
+  for (size_t k = 1; k < H.nz; k++) {
+    for (size_t j = 1; j < H.ny; j++) {
+      for (size_t i = 1; i < H.nx; i++) {
+        // Get cell index. The "xmo" means: X direction Minus One
+        size_t const id    = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny);
+        size_t const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny);
+        size_t const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny);
+        size_t const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny);
+
+        C.magnetic_x[id] = (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idymo + 2 * H.n_cells)) / H.dy -
+                           (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idzmo + 1 * H.n_cells)) / H.dz;
+        C.magnetic_y[id] = (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idzmo + 0 * H.n_cells)) / H.dz -
+                           (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idxmo + 2 * H.n_cells)) / H.dx;
+        C.magnetic_z[id] = (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idxmo + 1 * H.n_cells)) / H.dx -
+                           (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idymo + 0 * H.n_cells)) / H.dy;
+      }
+    }
+  }
+}
+#endif  // MHD
+}  // end namespace mhd::utils
\ No newline at end of file
diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h
index f28cbb400..f409fd4b0 100644
--- a/src/utils/mhd_utilities.h
+++ b/src/utils/mhd_utilities.h
@@ -8,287 +8,244 @@
 #pragma once
 
 // STL Includes
+#include <vector>
 
 // External Includes
 
 // Local Includes
 #include "../global/global.h"
 #include "../global/global_cuda.h"
+#include "../grid/grid3D.h"
+#include "../utils/cuda_utilities.h"
 #include "../utils/gpu.hpp"
+#include "../utils/math_utilities.h"
 
+namespace mhd::utils
+{
 /*!
- * \brief Namespace for MHD utilities
+ * \brief Namespace for functions required by functions within the mhd::utils
+ * namespace. Everything in this name space should be regarded as private
+ * but is made accesible for testing
  *
  */
-namespace mhdUtils
+namespace internal
 {
-    namespace // Anonymouse namespace
-    {
-        // =====================================================================
-        /*!
-         * \brief Compute the fast or slow magnetosonic wave speeds
-         *
-         * \param density The density
-         * \param gasPressure The gas pressure
-         * \param magneticX Magnetic field in the x-direction
-         * \param magneticY Magnetic field in the y-direction
-         * \param magneticZ Magnetic field in the z-direction
-         * \param gamma The adiabatic index
-         * \param waveChoice Which speed to compute. If +1 then compute the
-         * speed of the fast magnetosonic wave, if -1 then the speed of the slow
-         * magnetosonic wave
-         * \return Real The speed of the fast or slow magnetosonic wave
-         */
-        inline __host__ __device__ Real _magnetosonicSpeed(Real const &density,
-                                                           Real const &gasPressure,
-                                                           Real const &magneticX,
-                                                           Real const &magneticY,
-                                                           Real const &magneticZ,
-                                                           Real const &gamma,
-                                                           Real const &waveChoice)
-        {
-            // Compute the sound speed
-            Real bXSquared = magneticX * magneticX;
-            Real bSquared  = bXSquared + ((magneticY*magneticY) + (magneticZ*magneticZ));
-
-            Real term1 = gamma * gasPressure + bSquared;
-
-            Real term2 = (term1*term1) - 4. * gamma * gasPressure * bXSquared;
-            term2      = sqrt(term2);
-
-            return sqrt( (term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER)) );
-        }
-        // =====================================================================
-    }// Anonymouse namespace
+// =====================================================================
+/*!
+ * \brief Compute the fast or slow magnetosonic wave speeds
+ *
+ * \param density The density
+ * \param gasPressure The gas pressure
+ * \param magneticX Magnetic field in the x-direction
+ * \param magneticY Magnetic field in the y-direction
+ * \param magneticZ Magnetic field in the z-direction
+ * \param gamma The adiabatic index
+ * \param waveChoice Which speed to compute. If +1 then compute the
+ * speed of the fast magnetosonic wave, if -1 then the speed of the slow
+ * magnetosonic wave
+ * \return Real The speed of the fast or slow magnetosonic wave
+ */
+inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, Real const &gasPressure, Real const &magneticX,
+                                                   Real const &magneticY, Real const &magneticZ, Real const &gamma,
+                                                   Real const &waveChoice)
+{
+  // Compute the sound speed
+  Real bXSquared = magneticX * magneticX;
+  Real bSquared  = bXSquared + ((magneticY * magneticY) + (magneticZ * magneticZ));
 
-    // =========================================================================
-    /*!
-     * \brief Compute the MHD energy in the cell
-     *
-     * \param[in] pressure The gas pressure
-     * \param[in] density The density
-     * \param[in] velocityX Velocity in the x-direction
-     * \param[in] velocityY Velocity in the y-direction
-     * \param[in] velocityZ Velocity in the z-direction
-     * \param[in] magneticX Magnetic field in the x-direction
-     * \param[in] magneticY Magnetic field in the y-direction
-     * \param[in] magneticZ Magnetic field in the z-direction
-     * \param[in] gamma The adiabatic index
-     * \return Real The energy within a cell
-     */
-    inline __host__ __device__ Real computeEnergy(Real const &pressure,
-                                                  Real const &density,
-                                                  Real const &velocityX,
-                                                  Real const &velocityY,
-                                                  Real const &velocityZ,
-                                                  Real const &magneticX,
-                                                  Real const &magneticY,
-                                                  Real const &magneticZ,
-                                                  Real const &gamma)
-    {
-        // Compute and return energy
-        return (fmax(pressure,TINY_NUMBER)/(gamma - 1.))
-                + 0.5 * density * (velocityX*velocityX + ((velocityY*velocityY) + (velocityZ*velocityZ)))
-                + 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ)));
-    }
-    // =========================================================================
+  Real term1 = gamma * gasPressure + bSquared;
 
-    // =========================================================================
-    /*!
-     * \brief Compute the MHD gas pressure in a cell
-     *
-     * \param[in] energy The energy
-     * \param[in] density The density
-     * \param[in] momentumX Momentum in the x-direction
-     * \param[in] momentumY Momentum in the y-direction
-     * \param[in] momentumZ Momentum in the z-direction
-     * \param[in] magneticX Magnetic field in the x-direction
-     * \param[in] magneticY Magnetic field in the y-direction
-     * \param[in] magneticZ Magnetic field in the z-direction
-     * \param[in] gamma The adiabatic index
-     * \return Real The gas pressure in a cell
-     */
-    inline __host__ __device__ Real computeGasPressure(Real const &energy,
-                                                       Real const &density,
-                                                       Real const &momentumX,
-                                                       Real const &momentumY,
-                                                       Real const &momentumZ,
-                                                       Real const &magneticX,
-                                                       Real const &magneticY,
-                                                       Real const &magneticZ,
-                                                       Real const &gamma)
-    {
-        Real pressure = (gamma - 1.)
-                            * (energy
-                                - 0.5 * (momentumX*momentumX + ((momentumY*momentumY) + (momentumZ*momentumZ))) / density
-                                - 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))));
+  Real term2 = (term1 * term1) - 4. * gamma * gasPressure * bXSquared;
+  term2      = sqrt(term2);
 
-        return fmax(pressure, TINY_NUMBER);
-    }
-    // =========================================================================
+  return sqrt((term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER)));
+}
+// =====================================================================
+}  // namespace internal
 
-    // =========================================================================
-    /*!
-     * \brief Compute the MHD thermal energy in a cell
-     *
-     * \param[in] energyTot The total energy
-     * \param[in] density The density
-     * \param[in] momentumX Momentum in the x-direction
-     * \param[in] momentumY Momentum in the y-direction
-     * \param[in] momentumZ Momentum in the z-direction
-     * \param[in] magneticX Magnetic field in the x-direction
-     * \param[in] magneticY Magnetic field in the y-direction
-     * \param[in] magneticZ Magnetic field in the z-direction
-     * \param[in] gamma The adiabatic index
-     * \return Real The thermal energy in a cell
-     */
-    inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot,
-                                                         Real const &density,
-                                                         Real const &momentumX,
-                                                         Real const &momentumY,
-                                                         Real const &momentumZ,
-                                                         Real const &magneticX,
-                                                         Real const &magneticY,
-                                                         Real const &magneticZ,
-                                                         Real const &gamma)
-    {
-        return energyTot - 0.5 * (momentumX*momentumX + ((momentumY*momentumY) + (momentumZ*momentumZ))) / fmax(density,TINY_NUMBER)
-                         - 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ)));
-    }
-    // =========================================================================
+// =========================================================================
+/*!
+ * \brief Compute the magnetic energy
+ *
+ * \param[in] magneticX The magnetic field in the X-direction
+ * \param[in] magneticY The magnetic field in the Y-direction
+ * \param[in] magneticZ The magnetic field in the Z-direction
+ * \return Real The magnetic energy
+ */
+inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Real const &magneticY,
+                                                      Real const &magneticZ)
+{
+  return 0.5 * math_utils::SquareMagnitude(magneticX, magneticY, magneticZ);
+}
+// =========================================================================
 
-    // =========================================================================
-    /*!
-     * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas
-     * pressure
-     *
-     * \param[in] gasPressure The gas pressure
-     * \param[in] magneticX Magnetic field in the x-direction
-     * \param[in] magneticY Magnetic field in the y-direction
-     * \param[in] magneticZ Magnetic field in the z-direction
-     * \return Real The total MHD pressure
-     */
-    inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure,
-                                                         Real const &magneticX,
-                                                         Real const &magneticY,
-                                                         Real const &magneticZ)
-    {
-        Real pTot =  gasPressure + 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ)));
+// =========================================================================
+/*!
+ * \brief Compute the MHD thermal energy in a cell
+ *
+ * \param[in] energyTot The total energy
+ * \param[in] density The density
+ * \param[in] momentumX Momentum in the x-direction
+ * \param[in] momentumY Momentum in the y-direction
+ * \param[in] momentumZ Momentum in the z-direction
+ * \param[in] magneticX Magnetic field in the x-direction
+ * \param[in] magneticY Magnetic field in the y-direction
+ * \param[in] magneticZ Magnetic field in the z-direction
+ * \param[in] gamma The adiabatic index
+ * \return Real The thermal energy in a cell
+ */
+inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot, Real const &density, Real const &momentumX,
+                                                     Real const &momentumY, Real const &momentumZ,
+                                                     Real const &magneticX, Real const &magneticY,
+                                                     Real const &magneticZ, Real const &gamma)
+{
+  return energyTot - 0.5 * math_utils::SquareMagnitude(momentumX, momentumY, momentumZ) / fmax(density, TINY_NUMBER) -
+         computeMagneticEnergy(magneticX, magneticY, magneticZ);
+}
+// =========================================================================
 
-        return fmax(pTot, TINY_NUMBER);
-    }
-    // =========================================================================
+// =========================================================================
+/*!
+ * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas
+ * pressure
+ *
+ * \param[in] gasPressure The gas pressure
+ * \param[in] magneticX Magnetic field in the x-direction
+ * \param[in] magneticY Magnetic field in the y-direction
+ * \param[in] magneticZ Magnetic field in the z-direction
+ * \return Real The total MHD pressure
+ */
+inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, Real const &magneticX,
+                                                     Real const &magneticY, Real const &magneticZ)
+{
+  Real pTot = gasPressure + computeMagneticEnergy(magneticX, magneticY, magneticZ);
 
-    // =========================================================================
-    /*!
-     * \brief Compute the speed of the fast magnetosonic wave
-     *
-     * \param density The gas pressure
-     * \param pressure The density
-     * \param magneticX Magnetic field in the x-direction
-     * \param magneticY Magnetic field in the y-direction
-     * \param magneticZ Magnetic field in the z-direction
-     * \param gamma The adiabatic index
-     * \return Real The speed of the fast magnetosonic wave
-     */
-    inline __host__ __device__ Real fastMagnetosonicSpeed(Real const &density,
-                                                          Real const &pressure,
-                                                          Real const &magneticX,
-                                                          Real const &magneticY,
-                                                          Real const &magneticZ,
-                                                          Real const &gamma)
-    {
-        // Compute the sound speed
-        return _magnetosonicSpeed(density,
-                                  pressure,
-                                  magneticX,
-                                  magneticY,
-                                  magneticZ,
-                                  gamma,
-                                  1.0);
-    }
-    // =========================================================================
+  return fmax(pTot, TINY_NUMBER);
+}
+// =========================================================================
 
-    // =========================================================================
-    /*!
-     * \brief Compute the speed of the slow magnetosonic wave
-     *
-     * \param density The gas pressure
-     * \param pressure The density
-     * \param magneticX Magnetic field in the x-direction
-     * \param magneticY Magnetic field in the y-direction
-     * \param magneticZ Magnetic field in the z-direction
-     * \param gamma The adiabatic index
-     * \return Real The speed of the slow magnetosonic wave
-     */
-    inline __host__ __device__ Real slowMagnetosonicSpeed(Real const &density,
-                                                          Real const &pressure,
-                                                          Real const &magneticX,
-                                                          Real const &magneticY,
-                                                          Real const &magneticZ,
-                                                          Real const &gamma)
-    {
-        // Compute the sound speed
-        return _magnetosonicSpeed(density,
-                                  pressure,
-                                  magneticX,
-                                  magneticY,
-                                  magneticZ,
-                                  gamma,
-                                  -1.0);
-    }
-    // =========================================================================
+// =========================================================================
+/*!
+ * \brief Compute the speed of the fast magnetosonic wave
+ *
+ * \param density The gas pressure
+ * \param pressure The density
+ * \param magneticX Magnetic field in the x-direction
+ * \param magneticY Magnetic field in the y-direction
+ * \param magneticZ Magnetic field in the z-direction
+ * \param gamma The adiabatic index
+ * \return Real The speed of the fast magnetosonic wave
+ */
+inline __host__ __device__ Real fastMagnetosonicSpeed(Real const &density, Real const &pressure, Real const &magneticX,
+                                                      Real const &magneticY, Real const &magneticZ, Real const &gamma)
+{
+  // Compute the sound speed
+  return mhd::utils::internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, 1.0);
+}
+// =========================================================================
 
-    // =========================================================================
-        /*!
-     * \brief Compute the speed of the Alfven wave in a cell
-     *
-     * \param[in] magneticX The magnetic field in the x direction, ie the direction
-     * along with the Riemann solver is acting
-     * \param[in] density The density in the cell
-     * \return Real The Alfven wave speed
-     */
-    inline __host__ __device__ Real alfvenSpeed(Real const &magneticX,
-                                                Real const &density)
-    {
-        // Compute the Alfven wave speed
-        return fabs(magneticX) / sqrt(fmax(density,TINY_NUMBER));
-    }
-    // =========================================================================
+// =========================================================================
+/*!
+ * \brief Compute the speed of the slow magnetosonic wave
+ *
+ * \param density The gas pressure
+ * \param pressure The density
+ * \param magneticX Magnetic field in the x-direction
+ * \param magneticY Magnetic field in the y-direction
+ * \param magneticZ Magnetic field in the z-direction
+ * \param gamma The adiabatic index
+ * \return Real The speed of the slow magnetosonic wave
+ */
+inline __host__ __device__ Real slowMagnetosonicSpeed(Real const &density, Real const &pressure, Real const &magneticX,
+                                                      Real const &magneticY, Real const &magneticZ, Real const &gamma)
+{
+  // Compute the sound speed
+  return mhd::utils::internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, -1.0);
+}
+// =========================================================================
 
-    // =========================================================================
-    /*!
-     * \brief Compute the cell centered average of the magnetic fields in a
-     * given cell
-     *
-     * \param[in] dev_conserved A pointer to the device array of conserved variables
-     * \param[in] id The 1D index into each grid subarray.
-     * \param[in] xid The x index
-     * \param[in] yid The y index
-     * \param[in] zid The z index
-     * \param[in] n_cells The total number of cells
-     * \param[in] nx The number of cells in the x-direction
-     * \param[in] ny The number of cells in the y-direction
-     * \param[out] avgBx The cell centered average magnetic field in the x-direction
-     * \param[out] avgBy The cell centered average magnetic field in the y-direction
-     * \param[out] avgBz The cell centered average magnetic field in the z-direction
-     */
-    inline __host__ __device__ void cellCenteredMagneticFields(Real const *dev_conserved,
-                                                               size_t const &id,
-                                                               size_t const &xid,
-                                                               size_t const &yid,
-                                                               size_t const &zid,
-                                                               size_t const &n_cells,
-                                                               size_t const &nx,
-                                                               size_t const &ny,
-                                                               Real &avgBx,
-                                                               Real &avgBy,
-                                                               Real &avgBz)
-    {
-        avgBx = 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + ((xid-1) + yid*nx     + zid*nx*ny)]);
-        avgBy = 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + (xid     + (yid-1)*nx + zid*nx*ny)]);
-        avgBz = 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + (xid     + yid*nx     + (zid-1)*nx*ny)]);
-    }
-    // =========================================================================
+// =========================================================================
+/*!
+ * \brief Compute the speed of the Alfven wave in a cell
+ *
+ * \param[in] magneticX The magnetic field in the x direction, ie the direction
+ * along with the Riemann solver is acting
+ * \param[in] density The density in the cell
+ * \return Real The Alfven wave speed
+ */
+inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, Real const &density)
+{
+  // Compute the Alfven wave speed
+  return fabs(magneticX) / sqrt(fmax(density, TINY_NUMBER));
+}
+// =========================================================================
 
-} // end  namespace mhdUtils
\ No newline at end of file
+// =========================================================================
+#ifdef MHD
+/*!
+ * \brief Compute the cell centered average of the magnetic fields in a
+ * given cell
+ *
+ * \param[in] dev_conserved A pointer to the device array of conserved variables
+ * \param[in] id The 1D index into each grid subarray.
+ * \param[in] xid The x index
+ * \param[in] yid The y index
+ * \param[in] zid The z index
+ * \param[in] n_cells The total number of cells
+ * \param[in] nx The number of cells in the x-direction
+ * \param[in] ny The number of cells in the y-direction
+ * \param[out] avgBx The cell centered average magnetic field in the x-direction
+ * \param[out] avgBy The cell centered average magnetic field in the y-direction
+ * \param[out] avgBz The cell centered average magnetic field in the z-direction
+ *
+ * \return Real local struct with the X, Y, and Z cell centered magnetic
+ * fields. Intended to be called with structured binding like `auto [x, y,
+ * z] = mhd::utils::cellCenteredMagneticFields(*args*)
+ */
+inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conserved, size_t const &id,
+                                                           size_t const &xid, size_t const &yid, size_t const &zid,
+                                                           size_t const &n_cells, size_t const &nx, size_t const &ny)
+{
+  // Ternary operator to check that no values outside of the magnetic field
+  // arrays are loaded. If the cell is on the edge that doesn't have magnetic
+  // fields on both sides then instead set the centered magnetic field to be
+  // equal to the magnetic field of the closest edge.
+  Real avgBx = (xid > 0) ?
+                         /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] +
+                                            dev_conserved[(grid_enum::magnetic_x)*n_cells +
+                                                          cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)])
+                         :
+                         /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id];
+  Real avgBy = (yid > 0) ?
+                         /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] +
+                                            dev_conserved[(grid_enum::magnetic_y)*n_cells +
+                                                          cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)])
+                         :
+                         /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id];
+  Real avgBz = (zid > 0) ?
+                         /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] +
+                                            dev_conserved[(grid_enum::magnetic_z)*n_cells +
+                                                          cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)])
+                         :
+                         /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id];
+
+  struct ReturnStruct {
+    Real x, y, z;
+  };
+  return ReturnStruct{avgBx, avgBy, avgBz};
+}
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Initialize the magnitice field from the vector potential
+ *
+ * \param H The Header struct
+ * \param C The Conserved struct
+ * \param vectorPotential The vector potential in the same format as the other arrays in Cholla
+ */
+void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserved const &C,
+                                               std::vector<Real> const &vectorPotential);
+// =========================================================================
+#endif  // MHD
+}  // end namespace mhd::utils
diff --git a/src/utils/mhd_utilities_tests.cpp b/src/utils/mhd_utilities_tests.cpp
deleted file mode 100644
index c5cbb25fb..000000000
--- a/src/utils/mhd_utilities_tests.cpp
+++ /dev/null
@@ -1,509 +0,0 @@
-/*!
- * \file mhd_utilities_tests.cpp
- * \author Robert 'Bob' Caddy (rvc@pitt.edu)
- * \brief Tests for the contents of mhd_utilities.h and mhd_utilities.cpp
- *
- */
-
-// STL Includes
-#include <vector>
-#include <string>
-#include <iostream>
-#include <numeric>
-#include <cmath>
-
-// External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
-
-// Local Includes
-#include "../utils/testing_utilities.h"
-#include "../utils/mhd_utilities.h"
-#include "../global/global.h"
-
-// =============================================================================
-// Local helper functions
-namespace
-{
-    struct testParams
-    {
-        double gamma = 5./3.;
-        std::vector<double> density      {8.4087201154e-100, 1.6756968986e2, 5.4882403847e100};
-        std::vector<double> velocityX    {7.0378624601e-100, 7.0829278656e2, 1.8800514112e100};
-        std::vector<double> velocityY    {7.3583469014e-100, 5.9283073464e2, 5.2725717864e100};
-        std::vector<double> velocityZ    {1.7182972216e-100, 8.8417748226e2, 1.5855352639e100};
-        std::vector<double> momentumX    {8.2340416681e-100, 8.1019429453e2, 5.5062596954e100};
-        std::vector<double> momentumY    {4.9924582299e-100, 7.1254780684e2, 6.5939640992e100};
-        std::vector<double> momentumZ    {3.6703192739e-100, 7.5676716066e2, 7.2115881803e100};
-        std::vector<double> energy       {3.0342082433e-100, 7.6976906577e2, 1.9487120853e100};
-        std::vector<double> pressureGas  {2.2244082909e-100, 8.6772951021e2, 6.7261085663e100};
-        std::vector<double> pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100};
-        std::vector<double> magneticX    {2.8568843801e-100, 9.2400807786e2, 2.1621115264e100};
-        std::vector<double> magneticY    {9.2900880344e-100, 8.0382409757e2, 6.6499532343e100};
-        std::vector<double> magneticZ    {9.5795678229e-100, 3.3284839263e2, 9.2337456649e100};
-        std::vector<std::string> names{"Small number case", "Medium number case", "Large number case"};
-    };
-}
-// =============================================================================
-
-
-// =============================================================================
-// Tests for the mhdUtils::computeEnergy function
-// =============================================================================
-/*!
- * \brief Test the mhdUtils::computeEnergy function with the standard set of
- * parameters
- *
- */
-TEST(tMHDComputeEnergy,
-     CorrectInputExpectCorrectOutput)
-{
-    testParams parameters;
-    std::vector<double> fiducialEnergies{3.3366124363499995e-100,
-                                         137786230.15630624,
-                                         9.2884430880010847e+301};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testEnergy = mhdUtils::computeEnergy(parameters.pressureGas.at(i),
-                                                  parameters.density.at(i),
-                                                  parameters.velocityX.at(i),
-                                                  parameters.velocityY.at(i),
-                                                  parameters.velocityZ.at(i),
-                                                  parameters.magneticX.at(i),
-                                                  parameters.magneticY.at(i),
-                                                  parameters.magneticZ.at(i),
-                                                  parameters.gamma);
-
-        testingUtilities::checkResults(fiducialEnergies.at(i),
-                                       testEnergy,
-                                       parameters.names.at(i));
-    }
-}
-
-/*!
- * \brief Test the mhdUtils::computeEnergy function with a the standard set of
- * parameters except pressure is now negative
- *
- */
-TEST(tMHDComputeEnergy,
-     NegativePressureExpectAutomaticFix)
-{
-    testParams parameters;
-    std::vector<double> fiducialEnergies{3.3366124363499995e-100,
-                                         137784928.56204093,
-                                         9.2884430880010847e+301};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testEnergy = mhdUtils::computeEnergy(-parameters.pressureGas.at(i),
-                                                  parameters.density.at(i),
-                                                  parameters.velocityX.at(i),
-                                                  parameters.velocityY.at(i),
-                                                  parameters.velocityZ.at(i),
-                                                  parameters.magneticX.at(i),
-                                                  parameters.magneticY.at(i),
-                                                  parameters.magneticZ.at(i),
-                                                  parameters.gamma);
-
-        testingUtilities::checkResults(fiducialEnergies.at(i),
-                                       testEnergy,
-                                       parameters.names.at(i));
-    }
-}
-// =============================================================================
-// End of tests for the mhdUtils::computeEnergy function
-// =============================================================================
-
-// =============================================================================
-// Tests for the mhdUtils::computeGasPressure function
-// =============================================================================
-/*!
- * \brief Test the mhdUtils::computeGasPressure function with the standard set of
- * parameters. Energy has been increased to avoid negative pressures
- *
- */
-TEST(tMHDComputeGasPressure,
-     CorrectInputExpectCorrectOutput)
-{
-    testParams parameters;
-    std::vector<double> energyMultiplier{3, 1.0E4, 1.0E105};
-    std::vector<double> fiducialGasPressures{1.8586864490415075e-100,
-                                             4591434.7663756227,
-                                             1.29869419465575e+205};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testGasPressure = mhdUtils::computeGasPressure(energyMultiplier.at(i) * parameters.energy.at(i),
-                                                            parameters.density.at(i),
-                                                            parameters.momentumX.at(i),
-                                                            parameters.momentumY.at(i),
-                                                            parameters.momentumZ.at(i),
-                                                            parameters.magneticX.at(i),
-                                                            parameters.magneticY.at(i),
-                                                            parameters.magneticZ.at(i),
-                                                            parameters.gamma);
-
-        testingUtilities::checkResults(fiducialGasPressures.at(i),
-                                       testGasPressure,
-                                       parameters.names.at(i));
-    }
-}
-
-/*!
- * \brief Test the mhdUtils::computeGasPressure function with a the standard set
- * of parameters which produce negative pressures
- *
- */
-TEST(tMHDComputeGasPressure,
-     NegativePressureExpectAutomaticFix)
-{
-    testParams parameters;
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testGasPressure = mhdUtils::computeGasPressure(parameters.energy.at(i),
-                                                            parameters.density.at(i),
-                                                            parameters.momentumX.at(i),
-                                                            parameters.momentumY.at(i),
-                                                            parameters.momentumZ.at(i),
-                                                            parameters.magneticX.at(i),
-                                                            parameters.magneticY.at(i),
-                                                            parameters.magneticZ.at(i),
-                                                            parameters.gamma);
-
-        // I'm using the binary equality assertion here since in the case of
-        // negative pressure the function should return exactly TINY_NUMBER
-        EXPECT_EQ(TINY_NUMBER, testGasPressure)
-            << "Difference in " << parameters.names.at(i) << std::endl;
-    }
-}
-// =============================================================================
-// End of tests for the mhdUtils::computeGasPressure function
-// =============================================================================
-
-
-// =============================================================================
-// Tests for the mhdUtils::computeThermalEnergy function
-// =============================================================================
-/*!
- * \brief Test the mhdUtils::computeThermalEnergy function with the standard set
- * of parameters.
- *
- */
-TEST(tMHDComputeThermalEnergy,
-     CorrectInputExpectCorrectOutput)
-{
-    testParams parameters;
-    std::vector<double> energyMultiplier{1.0E85, 1.0E4, 1.0E105};
-    std::vector<double> fiducialGasPressures{3.0342082433e-15,
-                                             6887152.1495634327,
-                                             1.9480412919836246e+205};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testGasPressure = mhdUtils::computeThermalEnergy(energyMultiplier.at(i) * parameters.energy.at(i),
-                                                              parameters.density.at(i),
-                                                              parameters.momentumX.at(i),
-                                                              parameters.momentumY.at(i),
-                                                              parameters.momentumZ.at(i),
-                                                              parameters.magneticX.at(i),
-                                                              parameters.magneticY.at(i),
-                                                              parameters.magneticZ.at(i),
-                                                              parameters.gamma);
-
-        testingUtilities::checkResults(fiducialGasPressures.at(i),
-                                       testGasPressure,
-                                       parameters.names.at(i));
-    }
-}
-// =============================================================================
-// End of tests for the mhdUtils::computeThermalEnergyfunction
-// =============================================================================
-
-// =============================================================================
-// Tests for the mhdUtils::computeTotalPressure function
-// =============================================================================
-/*!
- * \brief Test the mhdUtils::computeTotalPressure function with the standard set
- * of parameters.
- *
- */
-TEST(tMHDComputeTotalPressure,
-     CorrectInputExpectCorrectOutput)
-{
-    testParams parameters;
-    std::vector<double> fiducialTotalPressures{9.9999999999999995e-21,
-                                               806223.80964077567,
-                                               6.7079331637514151e+201};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testTotalPressure = mhdUtils::computeTotalPressure(parameters.pressureGas.at(i),
-                                                                parameters.magneticX.at(i),
-                                                                parameters.magneticY.at(i),
-                                                                parameters.magneticZ.at(i));
-
-        testingUtilities::checkResults(fiducialTotalPressures.at(i),
-                                       testTotalPressure,
-                                       parameters.names.at(i));
-    }
-}
-
-/*!
- * \brief Test the mhdUtils::computeTotalPressure function with a the standard
- * set of parameters. Gas pressure has been multiplied and made negative to
- * generate negative total pressures
- *
- */
-TEST(tMHDComputeTotalPressure,
-     NegativePressureExpectAutomaticFix)
-{
-    testParams parameters;
-    std::vector<double> pressureMultiplier{1.0, -1.0e4, -1.0e105};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testTotalPressure = mhdUtils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i),
-                                                                parameters.magneticX.at(i),
-                                                                parameters.magneticY.at(i),
-                                                                parameters.magneticZ.at(i));
-
-        // I'm using the binary equality assertion here since in the case of
-        // negative pressure the function should return exactly TINY_NUMBER
-        EXPECT_EQ(TINY_NUMBER, testTotalPressure)
-            << "Difference in " << parameters.names.at(i) << std::endl;
-    }
-}
-// =============================================================================
-// End of tests for the mhdUtils::computeTotalPressure function
-// =============================================================================
-
-// =============================================================================
-// Tests for the mhdUtils::fastMagnetosonicSpeed function
-// =============================================================================
-/*!
- * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard
- * set of parameters. All values are reduced by 1e-25 in the large number case
- * to avoid overflow
- *
- */
-TEST(tMHDFastMagnetosonicSpeed,
-     CorrectInputExpectCorrectOutput)
-{
-    testParams parameters;
-    std::vector<double> fiducialFastMagnetosonicSpeed{1.9254472601190615e-40,
-                                                      98.062482309387562,
-                                                      1.5634816865472293e+38};
-    std::vector<double> coef{1.0, 1.0, 1.0e-25};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed(
-                                                coef.at(i)*parameters.density.at(i),
-                                                coef.at(i)*parameters.pressureGas.at(i),
-                                                coef.at(i)*parameters.magneticX.at(i),
-                                                coef.at(i)*parameters.magneticY.at(i),
-                                                coef.at(i)*parameters.magneticZ.at(i),
-                                                parameters.gamma);
-
-        testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i),
-                                       testFastMagnetosonicSpeed,
-                                       parameters.names.at(i));
-    }
-}
-
-/*!
- * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard
- * set of parameters, density is negative. All values are reduced by 1e-25 in
- * the large number case to avoid overflow.
- *
- */
-TEST(tMHDFastMagnetosonicSpeed,
-     NegativeDensityExpectAutomaticFix)
-{
-    testParams parameters;
-    std::vector<double> fiducialFastMagnetosonicSpeed{1.9254472601190615e-40,
-                                                      12694062010603.15,
-                                                      1.1582688085027081e+86};
-    std::vector<double> coef{1.0, 1.0, 1.0e-25};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed(
-                                                -coef.at(i)*parameters.density.at(i),
-                                                coef.at(i)*parameters.pressureGas.at(i),
-                                                coef.at(i)*parameters.magneticX.at(i),
-                                                coef.at(i)*parameters.magneticY.at(i),
-                                                coef.at(i)*parameters.magneticZ.at(i),
-                                                parameters.gamma);
-
-        testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i),
-                                       testFastMagnetosonicSpeed,
-                                       parameters.names.at(i));
-    }
-}
-// =============================================================================
-// End of tests for the mhdUtils::fastMagnetosonicSpeed function
-// =============================================================================
-
-// =============================================================================
-// Tests for the mhdUtils::slowMagnetosonicSpeed function
-// =============================================================================
-/*!
- * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard
- * set of parameters. All values are reduced by 1e-25 in the large number case
- * to avoid overflow
- *
- */
-TEST(tMHDSlowMagnetosonicSpeed,
-     CorrectInputExpectCorrectOutput)
-{
-    testParams parameters;
-    std::vector<double> fiducialSlowMagnetosonicSpeed{0.0,
-                                                      2.138424778167535,
-                                                      0.26678309355540852};
-    // Coefficient to make sure the output is well defined and not nan or inf
-    double const coef = 1E-95;
-
-    for (size_t i = 2; i < parameters.names.size(); i++)
-    {
-        Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed(
-                                                parameters.density.at(i) * coef,
-                                                parameters.pressureGas.at(i) * coef,
-                                                parameters.magneticX.at(i) * coef,
-                                                parameters.magneticY.at(i) * coef,
-                                                parameters.magneticZ.at(i) * coef,
-                                                parameters.gamma);
-
-        testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i),
-                                       testSlowMagnetosonicSpeed,
-                                       parameters.names.at(i));
-    }
-}
-
-/*!
- * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard
- * set of parameters, density is negative. All values are reduced by 1e-25 in
- * the large number case to avoid overflow.
- *
- */
-TEST(tMHDSlowMagnetosonicSpeed,
-     NegativeDensityExpectAutomaticFix)
-{
-    testParams parameters;
-    std::vector<double> fiducialSlowMagnetosonicSpeed{0.0,
-                                                      276816332809.37604,
-                                                      1976400098318.3574};
-    // Coefficient to make sure the output is well defined and not nan or inf
-    double const coef = 1E-95;
-
-    for (size_t i = 2; i < parameters.names.size(); i++)
-    {
-        Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed(
-                                                -parameters.density.at(i) * coef,
-                                                parameters.pressureGas.at(i) * coef,
-                                                parameters.magneticX.at(i) * coef,
-                                                parameters.magneticY.at(i) * coef,
-                                                parameters.magneticZ.at(i) * coef,
-                                                parameters.gamma);
-
-        testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i),
-                                       testSlowMagnetosonicSpeed,
-                                       parameters.names.at(i));
-    }
-}
-// =============================================================================
-// End of tests for the mhdUtils::slowMagnetosonicSpeed function
-// =============================================================================
-
-// =============================================================================
-// Tests for the mhdUtils::alfvenSpeed function
-// =============================================================================
-/*!
- * \brief Test the mhdUtils::alfvenSpeed function with the standard set of
- * parameters.
- *
- */
-TEST(tMHDAlfvenSpeed,
-     CorrectInputExpectCorrectOutput)
-{
-    testParams parameters;
-    std::vector<double> fiducialAlfvenSpeed{2.8568843800999998e-90,
-                                            71.380245120271113,
-                                            9.2291462785524423e+49};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i),
-                                                     parameters.density.at(i));
-
-        testingUtilities::checkResults(fiducialAlfvenSpeed.at(i),
-                                       testAlfvenSpeed,
-                                       parameters.names.at(i));
-    }
-}
-
-/*!
- * \brief Test the mhdUtils::alfvenSpeed function with the standard set of
- * parameters except density is negative
- *
- */
-TEST(tMHDAlfvenSpeed,
-     NegativeDensityExpectAutomaticFix)
-{
-    testParams parameters;
-    std::vector<double> fiducialAlfvenSpeed{2.8568843800999998e-90,
-                                            9240080778600,
-                                            2.1621115263999998e+110};
-
-    for (size_t i = 0; i < parameters.names.size(); i++)
-    {
-        Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i),
-                                                     -parameters.density.at(i));
-
-        testingUtilities::checkResults(fiducialAlfvenSpeed.at(i),
-                                       testAlfvenSpeed,
-                                       parameters.names.at(i));
-    }
-}
-// =============================================================================
-// End of tests for the mhdUtils::alfvenSpeed function
-// =============================================================================
-
-// =============================================================================
-// Tests for the mhdUtils::cellCenteredMagneticFields function
-// =============================================================================
-TEST(tMHDCellCenteredMagneticFields,
-     CorrectInputExpectCorrectOutput)
-{
-    // Initialize the test grid and other state variables
-    size_t const nx = 3, ny = nx;
-    size_t const xid = std::floor(nx/2), yid = xid, zid = xid;
-    size_t const id = xid + yid*nx + zid*nx*ny;
-
-    size_t const n_cells = std::pow(5,3);
-    // Make sure the vector is large enough that the locations where the
-    // magnetic field would be in the real grid are filled
-    std::vector<double> testGrid(n_cells * (8+NSCALARS));
-    // Populate the grid with values where testGrid.at(i) = double(i). The
-    // values chosen aren't that important, just that every cell has a unique
-    // value
-    std::iota(std::begin(testGrid), std::end(testGrid), 0.);
-
-    // Fiducial and test variables
-    double const fiducialAvgBx = 637.5,
-                 fiducialAvgBy = 761.5,
-                 fiducialAvgBz = 883.5;
-    double testAvgBx, testAvgBy, testAvgBz;
-
-    // Call the function to test
-    mhdUtils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny, testAvgBx, testAvgBy, testAvgBz);
-
-    // Check the results
-    testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value");
-    testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value");
-    testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value");
-}
-// =============================================================================
-// End of tests for the mhdUtils::cellCenteredMagneticFields function
-// =============================================================================
diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu
new file mode 100644
index 000000000..be9b48c78
--- /dev/null
+++ b/src/utils/mhd_utilities_tests.cu
@@ -0,0 +1,385 @@
+/*!
+ * \file mhd_utilities_tests.cpp
+ * \author Robert 'Bob' Caddy (rvc@pitt.edu)
+ * \brief Tests for the contents of mhd_utilities.h and mhd_utilities.cpp
+ *
+ */
+
+// STL Includes
+#include <cmath>
+#include <iostream>
+#include <numeric>
+#include <random>
+#include <string>
+#include <vector>
+
+// External Includes
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
+
+// Local Includes
+#include "../global/global.h"
+#include "../grid/grid3D.h"
+#include "../utils/mhd_utilities.h"
+#include "../utils/testing_utilities.h"
+
+// =============================================================================
+// Local helper functions
+namespace
+{
+struct TestParams {
+  double gamma = 5. / 3.;
+  std::vector<double> density{8.4087201154e-100, 1.6756968986e2, 5.4882403847e100};
+  std::vector<double> velocityX{7.0378624601e-100, 7.0829278656e2, 1.8800514112e100};
+  std::vector<double> velocityY{7.3583469014e-100, 5.9283073464e2, 5.2725717864e100};
+  std::vector<double> velocityZ{1.7182972216e-100, 8.8417748226e2, 1.5855352639e100};
+  std::vector<double> momentumX{8.2340416681e-100, 8.1019429453e2, 5.5062596954e100};
+  std::vector<double> momentumY{4.9924582299e-100, 7.1254780684e2, 6.5939640992e100};
+  std::vector<double> momentumZ{3.6703192739e-100, 7.5676716066e2, 7.2115881803e100};
+  std::vector<double> energy{3.0342082433e-100, 7.6976906577e2, 1.9487120853e100};
+  std::vector<double> pressureGas{2.2244082909e-100, 8.6772951021e2, 6.7261085663e100};
+  std::vector<double> pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100};
+  std::vector<double> magneticX{2.8568843801e-100, 9.2400807786e2, 2.1621115264e100};
+  std::vector<double> magneticY{9.2900880344e-100, 8.0382409757e2, 6.6499532343e100};
+  std::vector<double> magneticZ{9.5795678229e-100, 3.3284839263e2, 9.2337456649e100};
+  std::vector<std::string> names{"Small number case", "Medium number case", "Large number case"};
+};
+}  // namespace
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::computeThermalEnergy function
+// =============================================================================
+/*!
+ * \brief Test the mhd::utils::computeThermalEnergy function with the standard
+ * set of parameters.
+ *
+ */
+TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> energyMultiplier{1.0E85, 1.0E4, 1.0E105};
+  std::vector<double> fiducialGasPressures{3.0342082433e-15, 6887152.1495634327, 1.9480412919836246e+205};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testGasPressure = mhd::utils::computeThermalEnergy(
+        energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i),
+        parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i),
+        parameters.magneticZ.at(i), parameters.gamma);
+
+    testing_utilities::Check_Results(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i));
+  }
+}
+// =============================================================================
+// End of tests for the mhd::utils::computeThermalEnergy function
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::computeMagneticEnergy function
+// =============================================================================
+/*!
+ * \brief Test the mhd::utils::computeMagneticEnergy function with the standard
+ * set of parameters.
+ *
+ */
+TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> energyMultiplier{1.0E85, 1.0E4, 1.0E105};
+  std::vector<double> fiducialEnergy{0.0, 805356.08013056568, 6.7079331637514162e+201};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), parameters.magneticY.at(i),
+                                                                parameters.magneticZ.at(i));
+
+    testing_utilities::Check_Results(fiducialEnergy.at(i), testMagneticEnergy, parameters.names.at(i));
+  }
+}
+// =============================================================================
+// End of tests for the mhd::utils::computeMagneticEnergy function
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::computeTotalPressure function
+// =============================================================================
+/*!
+ * \brief Test the mhd::utils::computeTotalPressure function with the standard
+ * set of parameters.
+ *
+ */
+TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducialTotalPressures{9.9999999999999995e-21, 806223.80964077567, 6.7079331637514151e+201};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), parameters.magneticX.at(i),
+                                                              parameters.magneticY.at(i), parameters.magneticZ.at(i));
+
+    testing_utilities::Check_Results(fiducialTotalPressures.at(i), testTotalPressure, parameters.names.at(i));
+  }
+}
+
+/*!
+ * \brief Test the mhd::utils::computeTotalPressure function with a the standard
+ * set of parameters. Gas pressure has been multiplied and made negative to
+ * generate negative total pressures
+ *
+ */
+TEST(tMHDComputeTotalPressure, NegativePressureExpectAutomaticFix)
+{
+  TestParams parameters;
+  std::vector<double> pressureMultiplier{1.0, -1.0e4, -1.0e105};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testTotalPressure = mhd::utils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i),
+                                                              parameters.magneticX.at(i), parameters.magneticY.at(i),
+                                                              parameters.magneticZ.at(i));
+
+    // I'm using the binary equality assertion here since in the case of
+    // negative pressure the function should return exactly TINY_NUMBER
+    EXPECT_EQ(TINY_NUMBER, testTotalPressure) << "Difference in " << parameters.names.at(i) << std::endl;
+  }
+}
+// =============================================================================
+// End of tests for the mhd::utils::computeTotalPressure function
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::fastMagnetosonicSpeed function
+// =============================================================================
+/*!
+ * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard
+ * set of parameters. All values are reduced by 1e-25 in the large number case
+ * to avoid overflow
+ *
+ */
+TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 98.062482309387562, 1.5634816865472293e+38};
+  std::vector<double> coef{1.0, 1.0, 1.0e-25};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed(
+        coef.at(i) * parameters.density.at(i), coef.at(i) * parameters.pressureGas.at(i),
+        coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i),
+        coef.at(i) * parameters.magneticZ.at(i), parameters.gamma);
+
+    testing_utilities::Check_Results(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed,
+                                     parameters.names.at(i));
+  }
+}
+
+/*!
+ * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard
+ * set of parameters, density is negative. All values are reduced by 1e-25 in
+ * the large number case to avoid overflow.
+ *
+ */
+TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix)
+{
+  TestParams parameters;
+  std::vector<double> fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 12694062010603.15, 1.1582688085027081e+86};
+  std::vector<double> coef{1.0, 1.0, 1.0e-25};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed(
+        -coef.at(i) * parameters.density.at(i), coef.at(i) * parameters.pressureGas.at(i),
+        coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i),
+        coef.at(i) * parameters.magneticZ.at(i), parameters.gamma);
+
+    testing_utilities::Check_Results(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed,
+                                     parameters.names.at(i));
+  }
+}
+// =============================================================================
+// End of tests for the mhd::utils::fastMagnetosonicSpeed function
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::slowMagnetosonicSpeed function
+// =============================================================================
+/*!
+ * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard
+ * set of parameters. All values are reduced by 1e-25 in the large number case
+ * to avoid overflow
+ *
+ */
+TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducialSlowMagnetosonicSpeed{0.0, 2.138424778167535, 0.26678309355540852};
+  // Coefficient to make sure the output is well defined and not nan or inf
+  double const coef = 1E-95;
+
+  for (size_t i = 2; i < parameters.names.size(); i++) {
+    Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed(
+        parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef,
+        parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma);
+
+    testing_utilities::Check_Results(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed,
+                                     parameters.names.at(i));
+  }
+}
+
+/*!
+ * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard
+ * set of parameters, density is negative. All values are reduced by 1e-25 in
+ * the large number case to avoid overflow.
+ *
+ */
+TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix)
+{
+  TestParams parameters;
+  std::vector<double> fiducialSlowMagnetosonicSpeed{0.0, 276816332809.37604, 1976400098318.3574};
+  // Coefficient to make sure the output is well defined and not nan or inf
+  double const coef = 1E-95;
+
+  for (size_t i = 2; i < parameters.names.size(); i++) {
+    Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed(
+        -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef,
+        parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma);
+
+    testing_utilities::Check_Results(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed,
+                                     parameters.names.at(i));
+  }
+}
+// =============================================================================
+// End of tests for the mhd::utils::slowMagnetosonicSpeed function
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::alfvenSpeed function
+// =============================================================================
+/*!
+ * \brief Test the mhd::utils::alfvenSpeed function with the standard set of
+ * parameters.
+ *
+ */
+TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput)
+{
+  TestParams parameters;
+  std::vector<double> fiducialAlfvenSpeed{2.8568843800999998e-90, 71.380245120271113, 9.2291462785524423e+49};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), parameters.density.at(i));
+
+    testing_utilities::Check_Results(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i));
+  }
+}
+
+/*!
+ * \brief Test the mhd::utils::alfvenSpeed function with the standard set of
+ * parameters except density is negative
+ *
+ */
+TEST(tMHDAlfvenSpeed, NegativeDensityExpectAutomaticFix)
+{
+  TestParams parameters;
+  std::vector<double> fiducialAlfvenSpeed{2.8568843800999998e-90, 9240080778600, 2.1621115263999998e+110};
+
+  for (size_t i = 0; i < parameters.names.size(); i++) {
+    Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), -parameters.density.at(i));
+
+    testing_utilities::Check_Results(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i));
+  }
+}
+// =============================================================================
+// End of tests for the mhd::utils::alfvenSpeed function
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::cellCenteredMagneticFields function
+// =============================================================================
+#ifdef MHD
+TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput)
+{
+  // Initialize the test grid and other state variables
+  size_t const nx = 3, ny = nx;
+  size_t const xid = std::floor(nx / 2), yid = xid, zid = xid;
+  size_t const id = xid + yid * nx + zid * nx * ny;
+
+  size_t const n_cells = std::pow(5, 3);
+  // Make sure the vector is large enough that the locations where the
+  // magnetic field would be in the real grid are filled
+  std::vector<double> testGrid(n_cells * (grid_enum::num_fields));
+  // Populate the grid with values where testGrid.at(i) = double(i). The
+  // values chosen aren't that important, just that every cell has a unique
+  // value
+  std::iota(std::begin(testGrid), std::end(testGrid), 0.);
+
+  // Fiducial and test variables
+  double const fiducialAvgBx = 637.5, fiducialAvgBy = 761.5, fiducialAvgBz = 883.5;
+
+  // Call the function to test
+  auto [testAvgBx, testAvgBy, testAvgBz] =
+      mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny);
+
+  // Check the results
+  testing_utilities::Check_Results(fiducialAvgBx, testAvgBx, "cell centered Bx value");
+  testing_utilities::Check_Results(fiducialAvgBy, testAvgBy, "cell centered By value");
+  testing_utilities::Check_Results(fiducialAvgBz, testAvgBz, "cell centered Bz value");
+}
+#endif  // MHD
+// =============================================================================
+// End of tests for the mhd::utils::cellCenteredMagneticFields function
+// =============================================================================
+
+// =============================================================================
+// Tests for the mhd::utils::Init_Magnetic_Field_With_Vector_Potential function
+// =============================================================================
+#ifdef MHD
+TEST(tMHDInitMagneticFieldWithVectorPotential, CorrectInputExpectCorrectOutput)
+{
+  // Mock up Header and Conserved structs
+  Header H;
+  Grid3D::Conserved C;
+
+  H.nx      = 2;
+  H.ny      = 2;
+  H.nz      = 2;
+  H.n_cells = H.nx * H.ny * H.nz;
+  H.dx      = 0.2;
+  H.dy      = 0.2;
+  H.dz      = 0.2;
+
+  double const default_fiducial = -999;
+  std::vector<double> conserved_vector(H.n_cells * grid_enum::num_fields, default_fiducial);
+  C.host       = conserved_vector.data();
+  C.density    = &(C.host[grid_enum::density * H.n_cells]);
+  C.momentum_x = &(C.host[grid_enum::momentum_x * H.n_cells]);
+  C.momentum_y = &(C.host[grid_enum::momentum_y * H.n_cells]);
+  C.momentum_z = &(C.host[grid_enum::momentum_z * H.n_cells]);
+  C.Energy     = &(C.host[grid_enum::Energy * H.n_cells]);
+  C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]);
+  C.magnetic_y = &(C.host[grid_enum::magnetic_y * H.n_cells]);
+  C.magnetic_z = &(C.host[grid_enum::magnetic_z * H.n_cells]);
+
+  // Mock up vector potential
+  std::vector<double> vector_potential(H.n_cells * 3, 0);
+  std::iota(vector_potential.begin(), vector_potential.end(), 0);
+
+  // Run the function
+  mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vector_potential);
+
+  // Check the results
+  double const bx_fiducial = -10.0;
+  double const by_fiducial = 15.0;
+  double const bz_fiducial = -5.0;
+
+  for (size_t i = 0; i < conserved_vector.size(); i++) {
+    if (i == 47) {
+      testing_utilities::Check_Results(bx_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i));
+    } else if (i == 55) {
+      testing_utilities::Check_Results(by_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i));
+    } else if (i == 63) {
+      testing_utilities::Check_Results(bz_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i));
+    } else {
+      testing_utilities::Check_Results(default_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i));
+    }
+  }
+}
+#endif  // MHD
+// =============================================================================
+// End of tests for the mhd::utils::Init_Magnetic_Field_With_Vector_Potential function
+// =============================================================================
diff --git a/src/utils/parallel_omp.cpp b/src/utils/parallel_omp.cpp
index 90a70c914..1e633ef07 100644
--- a/src/utils/parallel_omp.cpp
+++ b/src/utils/parallel_omp.cpp
@@ -1,56 +1,58 @@
 #ifdef PARALLEL_OMP
 
-#include "../utils/parallel_omp.h"
-
-void Get_OMP_Grid_Indxs(  int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, int *omp_gridIndx_end  ){
+  #include "../utils/parallel_omp.h"
 
+void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start,
+                        int *omp_gridIndx_end)
+{
   int grid_reminder, n_grid_omp, g_start, g_end;
   grid_reminder = n_grid_cells % n_omp_procs;
-  n_grid_omp = n_grid_cells / n_omp_procs;
+  n_grid_omp    = n_grid_cells / n_omp_procs;
 
-  g_start = 0;
+  g_start     = 0;
   int counter = 0;
-  while ( counter < omp_proc_id ){
+  while (counter < omp_proc_id) {
     g_start += n_grid_omp;
-    if ( counter < grid_reminder )  g_start += 1;
+    if (counter < grid_reminder) {
+      g_start += 1;
+    }
     counter += 1;
   }
   g_end = g_start + n_grid_omp;
-  if ( omp_proc_id < grid_reminder )  g_end += 1;
+  if (omp_proc_id < grid_reminder) {
+    g_end += 1;
+  }
 
   *omp_gridIndx_start = g_start;
-  *omp_gridIndx_end = g_end;
-
+  *omp_gridIndx_end   = g_end;
 }
 
-#ifdef PARTICLES
-void Get_OMP_Particles_Indxs( part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end ){
-
+  #ifdef PARTICLES
+void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start,
+                             part_int_t *omp_pIndx_end)
+{
   part_int_t n_parts_omp, parts_reminder, p_start, p_end;
 
   parts_reminder = n_parts_local % n_omp_procs;
-  n_parts_omp = n_parts_local / n_omp_procs;
+  n_parts_omp    = n_parts_local / n_omp_procs;
 
-  p_start = 0;
+  p_start     = 0;
   int counter = 0;
-  while ( counter < omp_proc_id ){
+  while (counter < omp_proc_id) {
     p_start += n_parts_omp;
-    if ( counter < parts_reminder ) p_start += 1;
+    if (counter < parts_reminder) {
+      p_start += 1;
+    }
     counter += 1;
   }
   p_end = p_start + n_parts_omp;
-  if ( omp_proc_id < parts_reminder ) p_end += 1;
+  if (omp_proc_id < parts_reminder) {
+    p_end += 1;
+  }
 
   *omp_pIndx_start = p_start;
-  *omp_pIndx_end = p_end;
-
+  *omp_pIndx_end   = p_end;
 }
-#endif
-
-
-
-
-
-
+  #endif
 
 #endif
diff --git a/src/utils/parallel_omp.h b/src/utils/parallel_omp.h
index b115dcb76..5e8f6cffa 100644
--- a/src/utils/parallel_omp.h
+++ b/src/utils/parallel_omp.h
@@ -1,20 +1,24 @@
 #ifdef PARALLEL_OMP
 
-#ifndef PARALLEL_OMP_H
-#define PARALLEL_OMP_H
+  #ifndef PARALLEL_OMP_H
+    #define PARALLEL_OMP_H
 
-#include <stdio.h>
-#include <stdlib.h>
-#include "math.h"
-#include "../global/global.h"
-#include <iostream>
-#include <omp.h>
+    #include <omp.h>
+    #include <stdio.h>
+    #include <stdlib.h>
 
-void Get_OMP_Grid_Indxs( int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, int *omp_gridIndx_end );
+    #include <iostream>
 
-#ifdef PARTICLES
-void Get_OMP_Particles_Indxs( part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end );
-#endif
+    #include "../global/global.h"
+    #include "math.h"
 
-#endif
+void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start,
+                        int *omp_gridIndx_end);
+
+    #ifdef PARTICLES
+void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start,
+                             part_int_t *omp_pIndx_end);
+    #endif
+
+  #endif
 #endif
diff --git a/src/utils/prng_utilities.h b/src/utils/prng_utilities.h
index 47e628a77..4eacbb0f1 100644
--- a/src/utils/prng_utilities.h
+++ b/src/utils/prng_utilities.h
@@ -1,39 +1,39 @@
 // STL Includes
-#include <random>
 #include <chrono>
+#include <random>
 #include <string>
 
 // Local includes
 #include "../global/global.h"
 
-
 #pragma once
 
 class ChollaPrngGenerator
 {
-public:
-    std::mt19937_64 inline static generator;
+ public:
+  std::mt19937_64 inline static generator;
 
-    ChollaPrngGenerator(struct parameters *P)
-    {
-        // If the seed isn't defined in the settings file or argv then generate
-        // a random seed
-        if (P->prng_seed == 0)
-        {
-            // Since std::random_device isn't guaranteed to be random or
-            // different for each rank we're going to convert both the base seed
-            // and MPI rank to strings, concatenated them, then hash the result.
-            // This should give a fairly random seed even if std::random_device
-            // isn't random
-            std::string hashString = std::to_string(std::random_device{}())
-                                    + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count())
-                                    + std::to_string(static_cast<std::uint_fast64_t>(procID));
-            std::size_t hashedSeed = std::hash<std::string>{}(hashString);
-            P->prng_seed = static_cast<std::uint_fast64_t>(hashedSeed);
-        }
+  ChollaPrngGenerator(struct Parameters *P)
+  {
+    // If the seed isn't defined in the settings file or argv then generate
+    // a random seed
+    if (P->prng_seed == 0) {
+      // Since std::random_device isn't guaranteed to be random or
+      // different for each rank we're going to convert both the base seed
+      // and MPI rank to strings, concatenated them, then hash the result.
+      // This should give a fairly random seed even if std::random_device
+      // isn't random
+      std::string hashString = std::to_string(std::random_device{}())
+#ifdef MPI_CHOLLA
+                               + std::to_string(static_cast<std::uint_fast64_t>(procID))
+#endif
+                               + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count());
+      std::size_t hashedSeed = std::hash<std::string>{}(hashString);
+      P->prng_seed           = static_cast<std::uint_fast64_t>(hashedSeed);
+    }
 
-        // Initialize the PRNG
-        generator.seed(P->prng_seed);
-    };
-    ~ChollaPrngGenerator() = default;
+    // Initialize the PRNG
+    generator.seed(P->prng_seed);
+  };
+  ~ChollaPrngGenerator() = default;
 };
diff --git a/src/utils/ran.h b/src/utils/ran.h
deleted file mode 100644
index 09a0b8868..000000000
--- a/src/utils/ran.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <stdlib.h>
-#include <math.h>
-
-typedef unsigned long long int Ullong;
-typedef double Doub;
-typedef unsigned int Uint;
-
-struct Ran {
-
-  Ullong u,v,w;
-  Ran(Ullong j) : v(4101842887655102017LL), w(1) {
-    u = j^v; int64();
-    v = u; int64();
-    w = v; int64();
-  }
-  inline Ullong int64() {
-    u = u * 2862933555777941757LL + 7046029254386353087LL;
-    v ^= v >> 17; v ^= v << 31; v ^= v >> 8;
-    w = 4294957665U*(w & 0xffffffff) + (w >> 32);
-    Ullong x = u ^ (u << 21); x ^= x >> 35; x ^= x << 4;
-    return (x + v) ^ w;
-  }
-  inline Doub doub() { return 5.42101086242752217E-20 * int64(); }
-  inline Uint int32() { return (Uint)int64(); }
-
-};
diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu
index 820f27826..6434f560b 100644
--- a/src/utils/reduction_utilities.cu
+++ b/src/utils/reduction_utilities.cu
@@ -13,48 +13,29 @@
 // Local Includes
 #include "../utils/reduction_utilities.h"
 
-#ifdef CUDA
-    namespace reduction_utilities
-    {
-        // =====================================================================
-        __global__ void kernelReduceMax(Real *in, Real* out, size_t N)
-        {
-            // Initialize maxVal to the smallest possible number
-            Real maxVal = -DBL_MAX;
-
-            // Grid stride loop to perform as much of the reduction as possible
-            for(size_t i = blockIdx.x * blockDim.x + threadIdx.x;
-                i < N;
-                 i += blockDim.x * gridDim.x)
-            {
-                // A transformation could go here
-
-                // Grid stride reduction
-                maxVal = max(maxVal,in[i]);
-            }
-
-            // Find the maximum val in the grid and write it to `out`. Note that
-            // there is no execution/memory barrier after this and so the
-            // reduced scalar is not available for use in this kernel. The grid
-            // wide barrier can be accomplished by ending this kernel here and
-            // then launching a new one or by using cooperative groups. If this
-            // becomes a need it can be added later
-            gridReduceMax(maxVal, out);
-        }
-        // =====================================================================
-
-        // =====================================================================
-        void reductionLaunchParams(uint &numBlocks, uint &threadsPerBlock, uint const &deviceNum)
-        {
-            cudaDeviceProp prop;
-            cudaGetDeviceProperties(&prop, deviceNum);
-
-            // Divide the total number of allowed threads by the number of
-            // threads per block
-            threadsPerBlock =  prop.maxThreadsPerBlock;
-            numBlocks       = (prop.maxThreadsPerMultiProcessor * prop.multiProcessorCount)
-                              / threadsPerBlock;
-        }
-        // =====================================================================
-    }//reduction_utilities
-#endif  //CUDA
\ No newline at end of file
+namespace reduction_utilities
+{
+// =====================================================================
+__global__ void kernelReduceMax(Real* in, Real* out, size_t N)
+{
+  // Initialize maxVal to the smallest possible number
+  Real maxVal = -DBL_MAX;
+
+  // Grid stride loop to perform as much of the reduction as possible
+  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) {
+    // A transformation could go here
+
+    // Grid stride reduction
+    maxVal = max(maxVal, in[i]);
+  }
+
+  // Find the maximum val in the grid and write it to `out`. Note that
+  // there is no execution/memory barrier after this and so the
+  // reduced scalar is not available for use in this kernel. The grid
+  // wide barrier can be accomplished by ending this kernel here and
+  // then launching a new one or by using cooperative groups. If this
+  // becomes a need it can be added later
+  gridReduceMax(maxVal, out);
+}
+// =====================================================================
+}  // namespace reduction_utilities
diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h
index 6935d481b..99191d8c5 100644
--- a/src/utils/reduction_utilities.h
+++ b/src/utils/reduction_utilities.h
@@ -8,7 +8,7 @@
 #pragma once
 
 // STL Includes
-#include <float.h>
+#include <cstdint>
 
 // External Includes
 
@@ -17,177 +17,290 @@
 #include "../global/global_cuda.h"
 #include "../utils/gpu.hpp"
 
-#ifdef CUDA
-    /*!
-    * \brief Namespace to contain device resident reduction functions. Includes
-    * functions and kernels for array reduction, warp level, block level, and
-    * grid level reductions.
-    *
-    */
-    namespace reduction_utilities
-    {
-        // =====================================================================
-        /*!
-        * \brief Perform a reduction within the warp/wavefront to find the
-        * maximum value of `val`
-        *
-        * \param[in] val The thread local variable to find the maximum of across
-        * the warp
-        * \return Real The maximum value of `val` within the warp
-        */
-        __inline__ __device__ Real warpReduceMax(Real val)
-        {
-            for (int offset = warpSize/2; offset > 0; offset /= 2)
-            {
-                val = max(val, __shfl_down(val, offset));
-            }
-            return val;
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-        * \brief Perform a reduction within the block to find the maximum value
-        * of `val`
-        *
-        * \param[in] val The thread local variable to find the maximum of across
-        * the block
-        * \return Real The maximum value of `val` within the block
-        */
-        __inline__ __device__ Real blockReduceMax(Real val)
-        {
-            // Shared memory for storing the results of each warp-wise partial
-            // reduction
-            __shared__ Real shared[::maxWarpsPerBlock];
-
-            int lane   = threadIdx.x % warpSize;  // thread ID within the warp,
-            int warpId = threadIdx.x / warpSize;  // ID of the warp itself
-
-            val = warpReduceMax(val);     // Each warp performs partial reduction
-
-            if (lane==0) shared[warpId]=val; // Write reduced value to shared memory
-
-            __syncthreads();              // Wait for all partial reductions
-
-            //read from shared memory only if that warp existed
-            val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
-
-            if (warpId==0) val = warpReduceMax(val); //Final reduce within first warp
-
-            return val;
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-        * \brief Perform an atomic reduction to find the maximum value of `val`
-        *
-        * \param[out] address The pointer to where to store the reduced scalar
-        * value in device memory
-        * \param[in] val The thread local variable to find the maximum of across
-        * the grid. Typically this should be a partial reduction that has
-        * already been reduced to the block level
-        */
-        __inline__ __device__ double atomicMax_double(double* address, double val)
-        {
-            unsigned long long int* address_as_ull = (unsigned long long int*) address;
-            unsigned long long int old = *address_as_ull, assumed;
-            // Explanation of loop here:
-            // https://stackoverflow.com/questions/16077464/atomicadd-for-double-on-gpu
-            // The loop is to make sure the value at address doesn't change
-            // between the load at the atomic since the entire operation isn't
-            // atomic
-
-            // While it appears that this could result in many times more atomic
-            // operations than required, in practice it's only a handful of
-            // extra operation even in the worst case. Running with 16,000
-            // blocks gives ~8-37 atomics after brief testing
-            do {
-                assumed = old;
-                old = atomicCAS(address_as_ull,
-                                assumed,
-                                __double_as_longlong(fmax(__longlong_as_double(assumed),val)));
-            } while (assumed != old);
-            return __longlong_as_double(old);
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-         * \brief Perform a reduction within the grid to find the maximum value
-         * of `val`. Note that the value of `out` should be set appropriately
-         * before the kernel launch that uses this function to avoid any
-         * potential race condition; the `cuda_utilities::setScalarDeviceMemory`
-         * function exists for this purpose.
-         *
-         * \details This function can perform a reduction to find the maximum of
-         * the thread local variable `val` across the entire grid. It relies on a
-         * warp-wise reduction using registers followed by a block-wise reduction
-         * using shared memory, and finally a grid-wise reduction using atomics.
-         * As a result the performance of this function is substantally improved
-         * by using as many threads per block as possible and as few blocks as
-         * possible since each block has to perform an atomic operation. To
-         * accomplish this it is reccommened that you use the
-         * `reductionLaunchParams` functions to get the optimal number of blocks
-         * and threads per block to launch rather than relying on Cholla defaults
-         * and then within the kernel using a grid-stride loop to make sure the
-         * kernel works with any combination of threads and blocks. Note that
-         * after this function call you cannot use the reduced value in global
-         * memory since there is no grid wide sync. You can get around this by
-         * either launching a second kernel to do the next steps or by using
-         * cooperative groups to perform a grid wide sync. During it's execution
-         * it also calls multiple __synchThreads and so cannot be called from
-         * within any kind of thread guard.
-         *
-         * \param[in] val The thread local variable to find the maximum of across
-         * the grid
-         * \param[out] out The pointer to where to store the reduced scalar value
-         * in device memory
-         */
-        __inline__ __device__ void gridReduceMax(Real val, Real* out)
-        {
-            // __syncthreads();  // Wait for all threads to calculate val;
-
-            // Reduce the entire block in parallel
-            val = blockReduceMax(val);
-
-            // Write block level reduced value to the output scalar atomically
-            if (threadIdx.x == 0) atomicMax_double(out, val);
-        }
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-         * \brief Find the maximum value in the array. Make sure to initialize
-         * `out` correctly before using this kernel; the
-         * `cuda_utilities::setScalarDeviceMemory` function exists for this
-         * purpose. If `in` and `out` are the same array that's ok, all the
-         * loads are completed before the overwrite occurs.
-         *
-         * \param[in] in The pointer to the array to reduce in device memory
-         * \param[out] out The pointer to where to store the reduced scalar
-         * value in device memory
-         * \param[in] N The size of the `in` array
-         */
-        __global__ void kernelReduceMax(Real *in, Real* out, size_t N);
-        // =====================================================================
-
-        // =====================================================================
-        /*!
-        * \brief Determine the optimal number of blocks and threads per block to
-        * use when launching a reduction kernel
-        *
-        * \param[out] numBlocks The maximum number of blocks that are
-        * scheduleable by the device in use when each block has the maximum
-        * number of threads
-        * \param[out] threadsPerBlock The maximum threads per block supported by
-        * the device in use
-        * \param[in] deviceNum optional: which device is being targeted.
-        * Defaults to zero
-        */
-        void reductionLaunchParams(uint &numBlocks,
-                                   uint &threadsPerBlock,
-                                   uint const &deviceNum=0);
-        // =====================================================================
-    }  // namespace reduction_utilities
-#endif  //CUDA
+/*!
+ * \brief Namespace to contain device resident reduction functions. Includes
+ * functions and kernels for array reduction, warp level, block level, and
+ * grid level reductions.
+ *
+ */
+namespace reduction_utilities
+{
+// =====================================================================
+/*!
+ * \brief Perform a reduction within the warp/wavefront to find the
+ * maximum value of `val`
+ *
+ * \param[in] val The thread local variable to find the maximum of across
+ * the warp
+ * \return Real The maximum value of `val` within the warp
+ */
+__inline__ __device__ Real warpReduceMax(Real val)
+{
+  for (int offset = warpSize / 2; offset > 0; offset /= 2) {
+    val = max(val, __shfl_down(val, offset));
+  }
+  return val;
+}
+// =====================================================================
+
+// =====================================================================
+/*!
+ * \brief Perform a reduction within the block to find the maximum value
+ * of `val`
+ *
+ * \param[in] val The thread local variable to find the maximum of across
+ * the block
+ * \return Real The maximum value of `val` within the block
+ */
+__inline__ __device__ Real blockReduceMax(Real val)
+{
+  // Shared memory for storing the results of each warp-wise partial
+  // reduction
+  __shared__ Real shared[::maxWarpsPerBlock];
+
+  int lane   = threadIdx.x % warpSize;  // thread ID within the warp,
+  int warpId = threadIdx.x / warpSize;  // ID of the warp itself
+
+  val = warpReduceMax(val);  // Each warp performs partial reduction
+
+  if (lane == 0) {
+    shared[warpId] = val;
+  }  // Write reduced value to shared memory
+
+  __syncthreads();  // Wait for all partial reductions
+
+  // read from shared memory only if that warp existed
+  val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
+
+  if (warpId == 0) {
+    val = warpReduceMax(val);
+  }  // Final reduce within first warp
+
+  return val;
+}
+// =====================================================================
+
+#ifndef O_HIP
+// =====================================================================
+// This section handles the atomics. It is complicated because CUDA
+// doesn't currently support atomics with non-integral types.
+// This code is taken from
+// https://github.com/rapidsai/cuml/blob/dc14361ba11c41f7a4e1e6a3625bbadd0f52daf7/cpp/src_prims/stats/minmax.cuh
+// with slight tweaks for our use case.
+// =====================================================================
+/*!
+ * \brief Do a device side bit cast
+ *
+ * \tparam To The output type
+ * \tparam From The input type
+ * \param from The input value
+ * \return To The bit cast version of From as type To
+ */
+template <class To, class From>
+__device__ constexpr To bit_cast(const From& from) noexcept
+{
+  // TODO: replace with `std::bitcast` once we adopt C++20 or libcu++ adds it
+  To to{};
+  static_assert(sizeof(To) == sizeof(From));
+  memcpy(&to, &from, sizeof(To));
+  return to;
+}
+
+/*!
+ * \brief Encode a float as an int
+ *
+ * \param val The float to encode
+ * \return int The encoded int
+ */
+inline __device__ int encode(float val)
+{
+  int i = bit_cast<int>(val);
+  return i >= 0 ? i : (1 << 31) | ~i;  // NOLINT(hicpp-signed-bitwise)
+}
+
+/*!
+ * \brief Encode a double as a long long int
+ *
+ * \param val The double to encode
+ * \return long long The encoded long long int
+ */
+inline __device__ long long encode(double val)
+{
+  auto i = bit_cast<std::int64_t>(val);
+  return i >= 0 ? i : (1ULL << 63) | ~i;  // NOLINT(hicpp-signed-bitwise)
+}
+
+/*!
+ * \brief Decodes an int as a float
+ *
+ * \param val The int to decode
+ * \return float The decoded float
+ */
+inline __device__ float decode(int val)
+{
+  if (val < 0) {
+    val = (1 << 31) | ~val;  // NOLINT(hicpp-signed-bitwise)
+  }
+  return bit_cast<float>(val);
+}
+
+/*!
+ * \brief Decodes a long long int as a double
+ *
+ * \param val The long long to decode
+ * \return double The decoded double
+ */
+inline __device__ double decode(long long val)
+{
+  if (val < 0) {
+    val = (1ULL << 63) | ~val;  // NOLINT(hicpp-signed-bitwise)
+  }
+  return bit_cast<double>(val);
+}
+#endif  // O_HIP
+/*!
+ * \brief Perform an atomic reduction to find the maximum value of `val`
+ *
+ * \param[out] address The pointer to where to store the reduced scalar
+ * value in device memory
+ * \param[in] val The thread local variable to find the maximum of across
+ * the grid. Typically this should be a partial reduction that has
+ * already been reduced to the block level
+ */
+inline __device__ float atomicMaxBits(float* address, float val)
+{
+#ifdef O_HIP
+  return atomicMax(address, val);
+#else   // O_HIP
+  int old = atomicMax((int*)address, encode(val));
+  return decode(old);
+#endif  // O_HIP
+}
+
+/*!
+ * \brief Perform an atomic reduction to find the maximum value of `val`
+ *
+ * \param[out] address The pointer to where to store the reduced scalar
+ * value in device memory
+ * \param[in] val The thread local variable to find the maximum of across
+ * the grid. Typically this should be a partial reduction that has
+ * already been reduced to the block level
+ */
+inline __device__ double atomicMaxBits(double* address, double val)
+{
+#ifdef O_HIP
+  return atomicMax(address, val);
+#else   // O_HIP
+  long long old = atomicMax((long long*)address, encode(val));
+  return decode(old);
+#endif  // O_HIP
+}
+
+/*!
+ * \brief Perform an atomic reduction to find the minimum value of `val`
+ *
+ * \param[out] address The pointer to where to store the reduced scalar
+ * value in device memory
+ * \param[in] val The thread local variable to find the minimum of across
+ * the grid. Typically this should be a partial reduction that has
+ * already been reduced to the block level
+ */
+inline __device__ float atomicMinBits(float* address, float val)
+{
+#ifdef O_HIP
+  return atomicMin(address, val);
+#else   // O_HIP
+  int old = atomicMin((int*)address, encode(val));
+  return decode(old);
+#endif  // O_HIP
+}
+
+/*!
+ * \brief Perform an atomic reduction to find the minimum value of `val`
+ *
+ * \param[out] address The pointer to where to store the reduced scalar
+ * value in device memory
+ * \param[in] val The thread local variable to find the minimum of across
+ * the grid. Typically this should be a partial reduction that has
+ * already been reduced to the block level
+ */
+inline __device__ double atomicMinBits(double* address, double val)
+{
+#ifdef O_HIP
+  return atomicMin(address, val);
+#else   // O_HIP
+  long long old = atomicMin((long long*)address, encode(val));
+  return decode(old);
+#endif  // O_HIP
+}
+// =====================================================================
+
+// =====================================================================
+/*!
+ * \brief Perform a reduction within the grid to find the maximum value
+ * of `val`. Note that the value of `out` should be set appropriately
+ * before the kernel launch that uses this function to avoid any
+ * potential race condition; the `cuda_utilities::setScalarDeviceMemory`
+ * function exists for this purpose.
+ * of `val`. Note that the value of `out` should be set appropriately
+ * before the kernel launch that uses this function to avoid any
+ * potential race condition; the `cuda_utilities::setScalarDeviceMemory`
+ * function exists for this purpose.
+ *
+ * \details This function can perform a reduction to find the maximum of
+ * the thread local variable `val` across the entire grid. It relies on a
+ * warp-wise reduction using registers followed by a block-wise reduction
+ * using shared memory, and finally a grid-wise reduction using atomics.
+ * As a result the performance of this function is substantally improved
+ * by using as many threads per block as possible and as few blocks as
+ * possible since each block has to perform an atomic operation. To
+ * accomplish this it is reccommened that you use the
+ * `AutomaticLaunchParams` functions to get the optimal number of blocks
+ * and threads per block to launch rather than relying on Cholla defaults
+ * and then within the kernel using a grid-stride loop to make sure the
+ * kernel works with any combination of threads and blocks. Note that
+ * after this function call you cannot use the reduced value in global
+ * memory since there is no grid wide sync. You can get around this by
+ * either launching a second kernel to do the next steps or by using
+ * cooperative groups to perform a grid wide sync. During it's execution
+ * it also calls multiple __synchThreads and so cannot be called from
+ * within any kind of thread guard.
+ *
+ * \param[in] val The thread local variable to find the maximum of across
+ * the grid
+ * \param[out] out The pointer to where to store the reduced scalar value
+ * in device memory
+ */
+__inline__ __device__ void gridReduceMax(Real val, Real* out)
+{
+  // Reduce the entire block in parallel
+  val = blockReduceMax(val);
+
+  // Write block level reduced value to the output scalar atomically
+  if (threadIdx.x == 0) {
+    atomicMaxBits(out, val);
+  }
+}
+// =====================================================================
+
+// =====================================================================
+/*!
+ * \brief Find the maximum value in the array. Make sure to initialize
+ * `out` correctly before using this kernel; the
+ * `cuda_utilities::setScalarDeviceMemory` function exists for this
+ * purpose. If `in` and `out` are the same array that's ok, all the
+ * loads are completed before the overwrite occurs.
+ * \brief Find the maximum value in the array. Make sure to initialize
+ * `out` correctly before using this kernel; the
+ * `cuda_utilities::setScalarDeviceMemory` function exists for this
+ * purpose. If `in` and `out` are the same array that's ok, all the
+ * loads are completed before the overwrite occurs.
+ *
+ * \param[in] in The pointer to the array to reduce in device memory
+ * \param[out] out The pointer to where to store the reduced scalar
+ * value in device memory
+ * \param[in] N The size of the `in` array
+ */
+__global__ void kernelReduceMax(Real* in, Real* out, size_t N);
+// =====================================================================
+}  // namespace reduction_utilities
diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu
index 2314b33be..5dd18c197 100644
--- a/src/utils/reduction_utilities_tests.cu
+++ b/src/utils/reduction_utilities_tests.cu
@@ -1,95 +1,69 @@
 /*!
  * \file reduction_utilities_tests.cpp
  * \author Robert 'Bob' Caddy (rvc@pitt.edu)
- * \brief Tests for the contents of reduction_utilities.h and reduction_utilities.cpp
+ * \brief Tests for the contents of reduction_utilities.h and
+ * reduction_utilities.cpp
  *
  */
 
 // STL Includes
-#include <vector>
-#include <string>
 #include <iostream>
 #include <random>
+#include <string>
+#include <vector>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local Includes
-#include "../utils/testing_utilities.h"
-#include "../utils/reduction_utilities.h"
 #include "../global/global.h"
-
-
+#include "../utils/DeviceVector.h"
+#include "../utils/cuda_utilities.h"
+#include "../utils/reduction_utilities.h"
+#include "../utils/testing_utilities.h"
 
 // =============================================================================
 // Tests for divergence max reduction
 // =============================================================================
 TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput)
 {
-    // Launch parameters
-    // =================
-    uint numBlocks, threadsPerBlock;
-    reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock);
-
-    // Grid Parameters & testing parameters
-    // ====================================
-    size_t const gridSize = 64;
-    size_t const size     = std::pow(gridSize, 3);;
-    Real   const maxValue = 4;
-    std::vector<Real> host_grid(size);
-    Real host_max = -DBL_MAX;
-
-    // Fill grid with random values and assign maximum value
-    std::mt19937 prng(1);
-    std::uniform_real_distribution<double> doubleRand(-std::abs(maxValue)-1, std::abs(maxValue) - 1);
-    std::uniform_int_distribution<int> intRand(0, host_grid.size()-1);
-    for (size_t i = 0; i < host_grid.size(); i++)
-    {
-        host_grid.at(i) = doubleRand(prng);
-    }
-    host_grid.at(intRand(prng)) = maxValue;
-
-
-    // Allocating and copying to device
-    // ================================
-    Real *dev_grid;
-    CudaSafeCall(cudaMalloc(&dev_grid, host_grid.size() * sizeof(Real)));
-    CudaSafeCall(cudaMemcpy(dev_grid, host_grid.data(), host_grid.size() * sizeof(Real), cudaMemcpyHostToDevice));
-
-    Real *dev_max_array;
-    CudaSafeCall(cudaMalloc(&dev_max_array, numBlocks*sizeof(Real)));
-    // Sets all bytes to 0.
-    cudaMemset(dev_max_array,0,numBlocks*sizeof(Real));
-    
-    Real host_max_array[numBlocks];
-    //Real *host_max_array = (Real *) malloc(numBlocks*sizeof(Real));
-    //CudaSafeCall( cudaHostAlloc(&host_max_array, numBlocks*sizeof(Real), cudaHostAllocDefault) );
-
-
-    // Do the reduction
-    // ================
-    hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, numBlocks, threadsPerBlock, 0, 0, dev_grid, dev_max_array, host_grid.size());
-    CudaCheckError();
-
-
-    // Copy back and sync
-    // ==================
-    CudaSafeCall(cudaMemcpy(&host_max_array, dev_max_array, numBlocks*sizeof(Real), cudaMemcpyDeviceToHost));
-    cudaDeviceSynchronize();
-
-    for (int i = 0; i < numBlocks; i++)
-    {
-        host_max = fmax(host_max,host_max_array[i]);
-    }
-
-    //free(host_max_array);
-
-    cudaFree(dev_max_array);
-
-    cudaFree(dev_grid);
-
-    // Perform comparison
-    testingUtilities::checkResults(maxValue, host_max, "maximum value found");
+  // Launch parameters
+  // =================
+  cuda_utilities::AutomaticLaunchParams static const launchParams(reduction_utilities::kernelReduceMax);
+
+  // Grid Parameters & testing parameters
+  // ====================================
+  size_t const gridSize = 64;
+  size_t const size     = std::pow(gridSize, 3);
+  ;
+  Real const maxValue = 4;
+  std::vector<Real> host_grid(size);
+
+  // Fill grid with random values and assign maximum value
+  std::mt19937 prng(1);
+  std::uniform_real_distribution<double> doubleRand(-std::abs(maxValue) - 1, std::abs(maxValue) - 1);
+  std::uniform_int_distribution<int> intRand(0, host_grid.size() - 1);
+  for (Real& host_data : host_grid) {
+    host_data = doubleRand(prng);
+  }
+  host_grid.at(intRand(prng)) = maxValue;
+
+  // Allocating and copying to device
+  // ================================
+  cuda_utilities::DeviceVector<Real> dev_grid(host_grid.size());
+  dev_grid.cpyHostToDevice(host_grid);
+
+  cuda_utilities::DeviceVector<Real> static dev_max(1);
+  dev_max.assign(std::numeric_limits<double>::lowest());
+
+  // Do the reduction
+  // ================
+  hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0,
+                     dev_grid.data(), dev_max.data(), host_grid.size());
+  GPU_Error_Check();
+
+  // Perform comparison
+  testing_utilities::Check_Results(maxValue, dev_max.at(0), "maximum value found");
 }
 // =============================================================================
 // Tests for divergence max reduction
diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp
index 9b8bee948..7b1055ecd 100644
--- a/src/utils/testing_utilities.cpp
+++ b/src/utils/testing_utilities.cpp
@@ -6,180 +6,120 @@
  */
 
 // STL includes
-#include <limits>
 #include <cmath>
 #include <cstring>
 #include <iostream>
+#include <limits>
 
 // External Includes
-#include <gtest/gtest.h>    // Include GoogleTest and related libraries/headers
+#include <gtest/gtest.h>  // Include GoogleTest and related libraries/headers
 
 // Local includes
-#include "../utils/testing_utilities.h" // Include the header file
-#include "../system_tests/system_tester.h" // provide systemTest class
+#include "../system_tests/system_tester.h"  // provide systemTest class
+#include "../utils/testing_utilities.h"     // Include the header file
 
-namespace testingUtilities
+namespace testing_utilities
 {
-    // =========================================================================
-    int64_t ulpsDistanceDbl(double const &a, double const &b)
-    {
-        // Save work if the floats are equal.
-        // Also handles +0 == -0
-        if (a == b) return 0;
-
-        const auto maxInt = std::numeric_limits<int64_t>::max();
-
-        // If either one is NaN then they are not equal, max distance.
-        if (std::isnan(a) || std::isnan(b)) return maxInt;
+// =========================================================================
+int64_t ulpsDistanceDbl(double const &a, double const &b)
+{
+  // Save work if the floats are equal.
+  // Also handles +0 == -0
+  if (a == b) {
+    return 0;
+  }
 
-        // If one's infinite and they're not equal, max distance.
-        if (std::isinf(a) || std::isinf(b)) return maxInt;
+  const auto maxInt = std::numeric_limits<int64_t>::max();
 
-        int64_t ia, ib;
-        std::memcpy(&ia, &a, sizeof(double));
-        std::memcpy(&ib, &b, sizeof(double));
+  // If either one is NaN then they are not equal, max distance.
+  if (std::isnan(a) || std::isnan(b)) {
+    return maxInt;
+  }
 
-        // Don't compare differently-signed floats.
-        if ((ia < 0) != (ib < 0)) return maxInt;
+  // If one's infinite and they're not equal, max distance.
+  if (std::isinf(a) || std::isinf(b)) {
+    return maxInt;
+  }
 
-        // Return the absolute value of the distance in ULPs.
-        int64_t distance = ia - ib;
-        if (distance < 0) distance = -distance;
+  int64_t ia, ib;
+  std::memcpy(&ia, &a, sizeof(double));
+  std::memcpy(&ib, &b, sizeof(double));
 
-        return distance;
-    }
-    // =========================================================================
-
-    // =========================================================================
-    bool nearlyEqualDbl(double  const &a,
-                        double  const &b,
-                        double  &absoluteDiff,
-                        int64_t &ulpsDiff,
-                        double  const &fixedEpsilon, // = 1E-14 by default
-                        int     const &ulpsEpsilon)  // = 4 by default
-    {
-        // Compute differences
-        ulpsDiff = ulpsDistanceDbl(a, b);
-        absoluteDiff = std::abs(a - b);
-
-        // Perform the ULP check which is for numbers far from zero
-        if (ulpsDiff <= ulpsEpsilon)
-        {
-            return true;
-        }
-        // Perform the absolute check which is for numbers near zero
-        else if (absoluteDiff <= fixedEpsilon)
-        {
-            return true;
-        }
-        // if none of the checks have passed indicate test failure
-        else
-        {
-            return false;
-        }
-    }
-    // =========================================================================
-
-    // =========================================================================
-    void checkResults(double fiducialNumber,
-                      double testNumber,
-                      std::string outString,
-                      double fixedEpsilon,
-                      int ulpsEpsilon)
-    {
-        // Check for equality and if not equal return difference
-        double absoluteDiff;
-        int64_t ulpsDiff;
-        bool areEqual;
-
-        if ((fixedEpsilon < 0) and (ulpsEpsilon < 0))
-        {
-            areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber,
-                                                        testNumber,
-                                                        absoluteDiff,
-                                                        ulpsDiff);
-        }
-        else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0))
-        {
-            areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber,
-                                                        testNumber,
-                                                        absoluteDiff,
-                                                        ulpsDiff,
-                                                        fixedEpsilon);
-        }
-        else
-        {
-            areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber,
-                                                        testNumber,
-                                                        absoluteDiff,
-                                                        ulpsDiff,
-                                                        fixedEpsilon,
-                                                        ulpsEpsilon);
-        }
-
-        EXPECT_TRUE(areEqual)
-            << "Difference in "                << outString       << std::endl
-            << "The fiducial value is:       " << fiducialNumber  << std::endl
-            << "The test value is:           " << testNumber      << std::endl
-            << "The absolute difference is:  " << absoluteDiff    << std::endl
-            << "The ULP difference is:       " << ulpsDiff        << std::endl;
-    }
-    // =========================================================================
-
-  void wrapperEqual(int i, int j, int k, std::string dataSetName,
-		    double test_value, double fid_value, double fixedEpsilon=5.0E-12) {
-
-    std::string outString;
-    outString += dataSetName;
-    outString += " dataset at [";
-    outString += i;
-    outString += ",";
-    outString += j;
-    outString += ",";
-    outString += k;
-    outString += "]";
-
-    checkResults(fid_value,test_value,outString,fixedEpsilon);
+  // Don't compare differently-signed floats.
+  if ((ia < 0) != (ib < 0)) {
+    return maxInt;
   }
 
-  void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) {
-    std::vector<size_t> testDims(3,1);
-    std::vector<double> testData = testObject.loadTestFieldData(dataSetName,testDims);
-    for (size_t i = 0; i < testDims[0]; i++)
-      {
-	for (size_t j = 0; j < testDims[1]; j++)
-	  {
-	    for (size_t k = 0; k < testDims[2]; k++)
-	      {
-		size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
-
-		wrapperEqual(i,j,k,dataSetName,testData.at(index),value);
-	      }
-	  }
-      }
+  // Return the absolute value of the distance in ULPs.
+  int64_t distance = ia - ib;
+  if (distance < 0) {
+    distance = -distance;
   }
 
-  void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName,
-		    double constant, double amplitude,
-		    double kx, double ky, double kz, double phase, double tolerance)
-  {
-    std::vector<size_t> testDims(3,1);
-    std::vector<double> testData = testObject.loadTestFieldData(dataSetName,testDims);
-    for (size_t i = 0; i < testDims[0]; i++)
-      {
-	for (size_t j = 0; j < testDims[1]; j++)
-	  {
-	    for (size_t k = 0; k < testDims[2]; k++)
-	      {
-		double value = constant + amplitude*std::sin(kx*i+ky*j+kz*k+phase);
-		size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
-		wrapperEqual(i,j,k,dataSetName,testData.at(index),value,tolerance);
-	      }
-	  }
-      }
-  }
+  return distance;
+}
+// =========================================================================
 
+// =========================================================================
+bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff,
+                    double const &fixedEpsilon,  // = 1E-14 by default
+                    int64_t const &ulpsEpsilon)  // = 4 by default
+{
+  // Compute differences
+  ulpsDiff     = ulpsDistanceDbl(a, b);
+  absoluteDiff = std::abs(a - b);
 
+  // Perform the ULP check which is for numbers far from zero and perform the absolute check which is for numbers near
+  // zero
+  return ulpsDiff <= ulpsEpsilon or absoluteDiff <= fixedEpsilon;
+}
+// =========================================================================
 
+void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value,
+                  double fixedEpsilon = 5.0E-12)
+{
+  std::string outString;
+  outString += dataSetName;
+  outString += " dataset at [";
+  outString += std::to_string(i);
+  outString += ",";
+  outString += std::to_string(j);
+  outString += ",";
+  outString += std::to_string(k);
+  outString += "]";
+
+  ASSERT_NO_FATAL_FAILURE(Check_Results<1>(fid_value, test_value, outString, fixedEpsilon));
+}
 
+void analyticConstant(system_test::SystemTestRunner testObject, std::string const &dataSetName, double value)
+{
+  std::vector<size_t> testDims(3, 1);
+  std::vector<double> testData = testObject.loadTestFieldData(dataSetName, testDims);
+  for (size_t i = 0; i < testDims[0]; i++) {
+    for (size_t j = 0; j < testDims[1]; j++) {
+      for (size_t k = 0; k < testDims[2]; k++) {
+        size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
+
+        ASSERT_NO_FATAL_FAILURE(wrapperEqual(i, j, k, dataSetName, testData.at(index), value));
+      }
+    }
+  }
 }
+
+void analyticSine(system_test::SystemTestRunner testObject, std::string const &dataSetName, double constant,
+                  double amplitude, double kx, double ky, double kz, double phase, double tolerance)
+{
+  std::vector<size_t> testDims(3, 1);
+  std::vector<double> testData = testObject.loadTestFieldData(dataSetName, testDims);
+  for (size_t i = 0; i < testDims[0]; i++) {
+    for (size_t j = 0; j < testDims[1]; j++) {
+      for (size_t k = 0; k < testDims[2]; k++) {
+        double value = constant + amplitude * std::sin(kx * i + ky * j + kz * k + phase);
+        size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k;
+        ASSERT_NO_FATAL_FAILURE(wrapperEqual(i, j, k, dataSetName, testData.at(index), value, tolerance));
+      }
+    }
+  }
+}
+
+}  // namespace testing_utilities
diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h
index 927a61f28..31258e676 100644
--- a/src/utils/testing_utilities.h
+++ b/src/utils/testing_utilities.h
@@ -9,8 +9,12 @@
 #pragma once
 
 // STL includes
+#include <iomanip>
+#include <limits>
+#include <sstream>
 #include <string>
-#include "../system_tests/system_tester.h" // provide systemTest class
+
+#include "../system_tests/system_tester.h"  // provide systemTest class
 
 // =============================================================================
 // NOTE: Global variables are declared as extern at the end of this file
@@ -22,143 +26,176 @@
  * considered compatible with CUDA/HIP.
  *
  */
-namespace testingUtilities
+namespace testing_utilities
 {
-    // =========================================================================
-    /*!
-     * \brief Compute the Units in the Last Place (ULP) difference between two doubles
-     *
-     * \details This function is modified from
-     * [Comparing Floating-Point Numbers Is Tricky by Matt Kline](https://bitbashing.io/comparing-floats.html)
-     * which is in turn based on
-     * [Comparing Floating Point Numbers, 2012 Edition by Bruce Dawson](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/).
-     * The latter seems to be the bible of floating point comparison and is the
-     * basis of Googletests ASSERT_DOUBLE_EQ assertion.
-     *
-     * This particular function checks that the two numbers if the numbers are
-     * perfectly equal, +0, -0, Nan, inf, or differently signed then it computes
-     * the ULP difference between them are returns it
-     *
-     * \param[in] a The first double you wish to compare. Order doesn't matter.
-     * \param[in] b The second double you wish to compare. Order doesn't matter.
-     * \return int64_t The ULP distance between a and b.
-     */
-    int64_t ulpsDistanceDbl(double const &a, double const &b);
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Check if two doubles are nearly equal.
-     *
-     * \details This function checks if two doubles are "nearly equal" which is
-     * defined as either: A) the absolute difference between them is less than
-     * the fixedEpsilon argument or B) the units in the last place (ULP)
-     * difference is less  than the ulpsEpsilon argument. Both of the epsilon
-     * arguments have default values which generally should not need to be
-     * changed.
-     *
-     * Why does fixedEpsilon default to 1E-14? Running the Sod shock tube when
-     * Cholla was compiled with GCC 9.3.0 vs. XL 16.1.1-10 on Summit lead to
-     * absolute differences in the results up to 1.77636E-15. A priori we chose
-     * that a difference between two numbers that was less than one order of
-     * magnitude greater than the difference between compilers would be
-     * considered "equal". I.e. since the maximum absolute error between the GCC
-     * and XL compilers was ~1.7E-15 our allowed margin of error should be
-     * ~1E-14.
-     *
-     * Why does ulpsEpsilon default to 4? Repeating the test above I computed
-     * the largest ULP difference that wasn't caught by the absolute difference
-     * requirement of 1E-14. It turns out that there were no uncaught
-     * differences at all so I kept ulpsEpsilon at 4 since that's the Googletest
-     * default for their floating point assertions
-     *
-     * \param[in] a The first double you wish to compare. Order doesn't matter.
-     * \param[in] b The first double you wish to compare. Order doesn't matter.
-     * \param[out] absoluteDiff The absolute difference between the numbers.
-     * Only returned if the numbers are not equal. If the numbers are equal then
-     * behaviour is undefined
-     * \param[out] ulpsDiff The ULP difference between the numbers.
-     * Only returned if the numbers are not equal. If the numbers are equal then
-     * behaviour is undefined
-     * \param[in] fixedEpsilon The allowed difference in real numbers. Defaults
-     * to 1E-14
-     * \param[in] ulpsEpsilon The allowed difference of ULPs. Defaults to 4
-     * \return bool Whether or not the numbers are equal
-     */
-    bool nearlyEqualDbl(double  const &a,
-                        double  const &b,
-                        double  &absoluteDiff,
-                        int64_t &ulpsDiff,
-                        double  const &fixedEpsilon = 1E-14,
-                        int     const &ulpsEpsilon  = 4);
-    // =========================================================================
-
-    void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, double fixedEpsilon);
-
-    void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value);
-
-    void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName,
-		      double constant, double amplitude, double kx, double ky, double kz,
-		      double phase, double tolerance);
-
-    // =========================================================================
-    /*!
-     * \brief A simple function to compare two doubles with the nearlyEqualDbl
-     * function, perform a GTest assert on the result, and print out the values
-     *
-     * \param[in] fiducialNumber The fiducial number to test against
-     * \param[in] testNumber The unverified number to test
-     * \param[in] outString A string to be printed in the first line of the output
-     * message. Format will be "Difference in outString"
-     * \param[in] fixedEpsilon The fixed epsilon to use in the comparison.
-     * Negative values are ignored and default behaviour is used
-     * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative
-     * values are ignored and default behaviour is used
-     */
-    void checkResults(double fiducialNumber,
-                      double testNumber,
-                      std::string outString,
-                      double fixedEpsilon = -999,
-                      int ulpsEpsilon = -999);
-    // =========================================================================
-
-    // =========================================================================
-    /*!
-     * \brief Holds a single std::string that's intended to be read only and
-     * global. Use for storing the path of the root directory of Cholla
-     *
-     */
-    class GlobalString
-    {
-    private:
-        /// The path variable
-        std::string _string;
-    public:
-        /*!
-         * \brief Initializes the _path member variable. Should only be called
-         * once in main
-         *
-         * \param inputPath The path to be store in _path
-         */
-        void init(std::string const &inputPath) {_string = inputPath;};
-
-        /*!
-         * \brief Get the String object
-         *
-         * \return std::string The string variable
-         */
-        std::string getString() {return _string;};
-        GlobalString()  = default;
-        ~GlobalString() = default;
-    };
-    // =========================================================================
+// =========================================================================
+/*!
+ * \brief Compute the Units in the Last Place (ULP) difference between two
+ * doubles
+ *
+ * \details This function is modified from
+ * [Comparing Floating-Point Numbers Is Tricky by Matt
+ * Kline](https://bitbashing.io/comparing-floats.html) which is in turn based on
+ * [Comparing Floating Point Numbers, 2012 Edition by Bruce
+ * Dawson](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/).
+ * The latter seems to be the bible of floating point comparison and is the
+ * basis of Googletests ASSERT_DOUBLE_EQ assertion.
+ *
+ * This particular function checks that the two numbers if the numbers are
+ * perfectly equal, +0, -0, Nan, inf, or differently signed then it computes
+ * the ULP difference between them are returns it
+ *
+ * \param[in] a The first double you wish to compare. Order doesn't matter.
+ * \param[in] b The second double you wish to compare. Order doesn't matter.
+ * \return int64_t The ULP distance between a and b.
+ */
+int64_t ulpsDistanceDbl(double const &a, double const &b);
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Check if two doubles are nearly equal.
+ *
+ * \details This function checks if two doubles are "nearly equal" which is
+ * defined as either: A) the absolute difference between them is less than
+ * the fixedEpsilon argument or B) the units in the last place (ULP)
+ * difference is less  than the ulpsEpsilon argument. Both of the epsilon
+ * arguments have default values which generally should not need to be
+ * changed.
+ *
+ * Why does fixedEpsilon default to 1E-14? Running the Sod shock tube when
+ * Cholla was compiled with GCC 9.3.0 vs. XL 16.1.1-10 on Summit lead to
+ * absolute differences in the results up to 1.77636E-15. A priori we chose
+ * that a difference between two numbers that was less than one order of
+ * magnitude greater than the difference between compilers would be
+ * considered "equal". I.e. since the maximum absolute error between the GCC
+ * and XL compilers was ~1.7E-15 our allowed margin of error should be
+ * ~1E-14.
+ *
+ * Why does ulpsEpsilon default to 4? Repeating the test above I computed
+ * the largest ULP difference that wasn't caught by the absolute difference
+ * requirement of 1E-14. It turns out that there were no uncaught
+ * differences at all so I kept ulpsEpsilon at 4 since that's the Googletest
+ * default for their floating point assertions
+ *
+ * \param[in] a The first double you wish to compare. Order doesn't matter.
+ * \param[in] b The first double you wish to compare. Order doesn't matter.
+ * \param[out] absoluteDiff The absolute difference between the numbers.
+ * Only returned if the numbers are not equal. If the numbers are equal then
+ * behaviour is undefined
+ * \param[out] ulpsDiff The ULP difference between the numbers.
+ * Only returned if the numbers are not equal. If the numbers are equal then
+ * behaviour is undefined
+ * \param[in] fixedEpsilon The allowed difference in real numbers. Defaults
+ * to 1E-14
+ * \param[in] ulpsEpsilon The allowed difference of ULPs. Defaults to 4
+ * \return bool Whether or not the numbers are equal
+ */
+bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff,
+                    double const &fixedEpsilon = 1E-14, int64_t const &ulpsEpsilon = 4);
+// =========================================================================
+
+void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value,
+                  double fixedEpsilon);
+
+void analyticConstant(system_test::SystemTestRunner testObject, std::string const &dataSetName, double value);
+
+void analyticSine(system_test::SystemTestRunner testObject, std::string const &dataSetName, double constant,
+                  double amplitude, double kx, double ky, double kz, double phase, double tolerance);
+
+// =========================================================================
+/*!
+ * \brief A simple function to compare two doubles with the nearlyEqualDbl
+ * function, perform a GTest assert on the result, and print out the values
+ *
+ * \tparam checkType The type of GTest assertion to use. "0" for and
+ * "EXPECT" and "1" for an "ASSERT"
+ * \param[in] fiducialNumber The fiducial number to test against
+ * \param[in] testNumber The unverified number to test
+ * \param[in] outString A string to be printed in the first line of the output
+ * message. Format will be "Difference in outString"
+ * \param[in] fixedEpsilon The fixed epsilon to use in the comparison.
+ * Negative values are ignored and default behaviour is used
+ * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative
+ * values are ignored and default behaviour is used
+ */
+template <int checkType = 0>
+void Check_Results(double fiducialNumber, double testNumber, std::string const &outString, double fixedEpsilon = -999,
+                   int64_t ulpsEpsilon = -999)
+{
+  // Check for equality and if not equal return difference
+  double absoluteDiff;
+  int64_t ulpsDiff;
+  bool areEqual;
+
+  if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) {
+    areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff);
+  } else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) {
+    areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon);
+  } else {
+    areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon,
+                                                 ulpsEpsilon);
+  }
+
+  std::stringstream outputMessage;
+  outputMessage << std::setprecision(std::numeric_limits<double>::max_digits10) << "Difference in " << outString
+                << std::endl
+                << "The fiducial value is:       " << fiducialNumber << std::endl
+                << "The test value is:           " << testNumber << std::endl
+                << "The absolute difference is:  " << absoluteDiff << std::endl
+                << "The ULP difference is:       " << ulpsDiff << std::endl;
+
+  if (checkType == 0) {
+    EXPECT_TRUE(areEqual) << outputMessage.str();
+  } else if (checkType == 1) {
+    ASSERT_TRUE(areEqual) << outputMessage.str();
+  } else {
+    throw std::runtime_error(
+        "Incorrect template argument passed to "
+        "Check_Results. Options are 0 and 1 but " +
+        std::to_string(checkType) + " was passed");
+  }
 }
+// =========================================================================
+
+// =========================================================================
+/*!
+ * \brief Holds a single std::string that's intended to be read only and
+ * global. Use for storing the path of the root directory of Cholla
+ *
+ */
+class GlobalString
+{
+ private:
+  /// The path variable
+  std::string _string;
+
+ public:
+  /*!
+   * \brief Initializes the _path member variable. Should only be called
+   * once in main
+   *
+   * \param inputPath The path to be store in _path
+   */
+  void init(std::string const &inputPath) { _string = inputPath; };
+
+  /*!
+   * \brief Get the String object
+   *
+   * \return std::string The string variable
+   */
+  std::string getString() { return _string; };
+  GlobalString()  = default;
+  ~GlobalString() = default;
+};
+// =========================================================================
+}  // namespace testing_utilities
 
 // Declare the global string variables so everything that imports this file
 // has access to them
-extern testingUtilities::GlobalString globalChollaRoot;
-extern testingUtilities::GlobalString globalChollaBuild;
-extern testingUtilities::GlobalString globalChollaMachine;
-extern testingUtilities::GlobalString globalMpiLauncher;
+extern testing_utilities::GlobalString globalChollaRoot;
+extern testing_utilities::GlobalString globalChollaBuild;
+extern testing_utilities::GlobalString globalChollaMachine;
+extern testing_utilities::GlobalString globalMpiLauncher;
 extern bool globalRunCholla;
 extern bool globalCompareSystemTestResults;
diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp
index 895c12806..a0382e43f 100644
--- a/src/utils/timing_functions.cpp
+++ b/src/utils/timing_functions.cpp
@@ -1,47 +1,88 @@
-
+#include "../utils/timing_functions.h"
 #ifdef CPU_TIME
 
-#include "../utils/timing_functions.h"
-#include "../io/io.h"
-#include <iostream>
-#include <fstream>
-#include <string>
+  #include <algorithm>
+  #include <fstream>
+  #include <iostream>
+  #include <string>
 
-#ifdef MPI_CHOLLA
-#include "../mpi/mpi_routines.h"
-#endif
+  #include "../global/global.h"
+  #include "../global/global_cuda.h"
+  #include "../io/io.h"
+
+  #ifdef MPI_CHOLLA
+    #include "../mpi/mpi_routines.h"
+  #endif
 
-void OneTime::Start(){
-  if (inactive) return;
-  time_start = get_time();
+void OneTime::Start()
+{
+  cudaDeviceSynchronize();
+  if (inactive) {
+    return;
+  }
+  time_start = Get_Time();
 }
 
-void OneTime::Subtract(Real time_to_subtract){
-  // Add the time_to_substract to the start time, that way the time_end - time_start is reduced by time_to_substract
+void OneTime::Subtract(Real time_to_subtract)
+{
+  // Add the time_to_substract to the start time, that way the time_end -
+  // time_start is reduced by time_to_substract
   time_start += time_to_subtract;
 }
 
-void OneTime::End(){
-  if (inactive) return;
-  Real time_end = get_time();
-  Real time = (time_end - time_start)*1000;
+void OneTime::End(bool const print_high_values)
+{
+  cudaDeviceSynchronize();
+  if (inactive) {
+    return;
+  }
+  Real time_end = Get_Time();
+  Real time     = (time_end - time_start) * 1000;
 
-#ifdef MPI_CHOLLA
+  #ifdef MPI_CHOLLA
   t_min = ReduceRealMin(time);
   t_max = ReduceRealMax(time);
   t_avg = ReduceRealAvg(time);
-#else
+  #else
   t_min = time;
   t_max = time;
   t_avg = time;
-#endif
-  if (n_steps > 0) t_all += t_max;
+  #endif
+  if (n_steps > 0) {
+    t_all += t_max;
+  }
   n_steps++;
-}
 
+  #ifdef MPI_CHOLLA
+  // Print out information if the process is unusually slow
+  if ((time >= 1.1 * t_avg) and (n_steps > 0) and print_high_values) {
+    // Get node ID
+    std::string node_id(MPI_MAX_PROCESSOR_NAME, ' ');
+    int length;
+    MPI_Get_processor_name(node_id.data(), &length);
+    node_id.resize(length);
+
+    // Get GPU ID
+    std::string gpu_id(MPI_MAX_PROCESSOR_NAME, ' ');
+    int device;
+    GPU_Error_Check(cudaGetDevice(&device));
+    GPU_Error_Check(cudaDeviceGetPCIBusId(gpu_id.data(), gpu_id.size(), device));
+    gpu_id.erase(
+        std::find_if(gpu_id.rbegin(), gpu_id.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(),
+        gpu_id.end());
+
+    std::cerr << "WARNING: Rank took longer than expected to execute." << std::endl
+              << "         Node Time: " << time << std::endl
+              << "         Avg Time: " << t_avg << std::endl
+              << "         Node ID: " << node_id << std::endl
+              << "         GPU PCI Bus ID: " << gpu_id << std::endl;
+  }
+  #endif  // MPI_CHOLLA
+}
 
-void OneTime::RecordTime( Real time ){
-  time *=  1000; //Convert from secs to ms
+void OneTime::RecordTime(Real time)
+{
+  time *= 1000;  // Convert from secs to ms
   #ifdef MPI_CHOLLA
   t_min = ReduceRealMin(time);
   t_max = ReduceRealMax(time);
@@ -51,27 +92,30 @@ void OneTime::RecordTime( Real time ){
   t_max = time;
   t_avg = time;
   #endif
-  if (n_steps > 0) t_all += t_max;
+  if (n_steps > 0) {
+    t_all += t_max;
+  }
   n_steps++;
 }
 
-
-void OneTime::PrintStep(){
+void OneTime::PrintStep()
+{
   chprintf(" Time %-19s min: %9.4f  max: %9.4f  avg: %9.4f   ms\n", name, t_min, t_max, t_avg);
 }
 
-void OneTime::PrintAverage(){
-  if (n_steps > 1) chprintf(" Time %-19s avg: %9.4f   ms\n", name, t_all/(n_steps-1));
-}
-
-void OneTime::PrintAll(){
-  chprintf(" Time %-19s all: %9.4f   ms\n", name, t_all);
+void OneTime::PrintAverage()
+{
+  if (n_steps > 1) {
+    chprintf(" Time %-19s avg: %9.4f   ms\n", name, t_all / (n_steps - 1));
+  }
 }
 
-Time::Time( void ){}
+void OneTime::PrintAll() { chprintf(" Time %-19s all: %9.4f   ms\n", name, t_all); }
 
-void Time::Initialize(){
+Time::Time(void) {}
 
+void Time::Initialize()
+{
   n_steps = 0;
 
   // Add or remove timers by editing this list, keep TOTAL at the end
@@ -79,62 +123,72 @@ void Time::Initialize(){
   // add Timer.NAME.Start() and Timer.NAME.End() where appropriate.
 
   onetimes = {
-    #ifdef PARTICLES
-    &(Calc_dt = OneTime("Calc_dt")),
-    #endif
-    &(Hydro = OneTime("Hydro")),
-    &(Boundaries = OneTime("Boundaries")),
-    #ifdef GRAVITY
-    &(Grav_Potential = OneTime("Grav_Potential")),
-    &(Pot_Boundaries = OneTime("Pot_Boundaries")),
-    #endif
-    #ifdef PARTICLES
-    &(Part_Density = OneTime("Part_Density")),
-    &(Part_Boundaries = OneTime("Part_Boundaries")),
-    &(Part_Dens_Transf = OneTime("Part_Dens_Transf")),
-    &(Advance_Part_1 = OneTime("Advance_Part_1")),
-    &(Advance_Part_2 = OneTime("Advance_Part_2")),
-    #endif
-    #ifdef COOLING_GRACKLE
-    &(Cooling = OneTime("Cooling")),
-    #endif
-    #ifdef CHEMISTRY_GPU
-    &(Chemistry = OneTime("Chemistry")),
+  #ifdef PARTICLES
+      &(Calc_dt = OneTime("Calc_dt")),
+  #endif
+      &(Hydro_Integrator = OneTime("Hydro_Integrator")),
+      &(Hydro = OneTime("Hydro")),
+      &(Boundaries = OneTime("Boundaries")),
+  #ifdef GRAVITY
+      &(Grav_Potential = OneTime("Grav_Potential")),
+      &(Pot_Boundaries = OneTime("Pot_Boundaries")),
+  #endif
+  #ifdef PARTICLES
+      &(Part_Density = OneTime("Part_Density")),
+      &(Part_Boundaries = OneTime("Part_Boundaries")),
+      &(Part_Dens_Transf = OneTime("Part_Dens_Transf")),
+      &(Advance_Part_1 = OneTime("Advance_Part_1")),
+      &(Advance_Part_2 = OneTime("Advance_Part_2")),
+  #endif
+  #ifdef COOLING_GPU
+      &(Cooling_GPU = OneTime("Cooling_GPU")),
+  #endif
+  #ifdef COOLING_GRACKLE
+      &(Cooling_Grackle = OneTime("Cooling_Grackle")),
+  #endif
+  #ifdef CHEMISTRY_GPU
+      &(Chemistry = OneTime("Chemistry")),
+  #endif
+  #ifdef SUPERNOVA
+      &(Feedback = OneTime("Feedback")),
+    #ifdef ANALYSIS
+      &(FeedbackAnalysis = OneTime("FeedbackAnalysis")),
     #endif
-    &(Total = OneTime("Total")),
+  #endif  // SUPERNOVA
+      &(Total = OneTime("Total")),
   };
 
-
-  chprintf( "\nTiming Functions is ON \n");
-
+  chprintf("\nTiming Functions is ON \n");
 }
 
-void Time::Print_Times(){
-  for (OneTime* x : onetimes){
+void Time::Print_Times()
+{
+  for (OneTime* x : onetimes) {
     x->PrintStep();
   }
 }
 
 // once at end of run in main.cpp
-void Time::Print_Average_Times( struct parameters P ){
-
+void Time::Print_Average_Times(struct Parameters P)
+{
   chprintf("\nAverage Times      n_steps:%d\n", n_steps);
 
-  for (OneTime* x : onetimes){
+  for (OneTime* x : onetimes) {
     x->PrintAverage();
   }
 
-  std::string file_name ( "run_timing.log" );
-  std::string header;
+  std::string file_name("run_timing.log");
 
-  chprintf( "Writing timing values to file: %s  \n", file_name.c_str());
+  chprintf("Writing timing values to file: %s  \n", file_name.c_str());
 
-  std::string gitHash    = "Git Commit Hash = " + std::string(GIT_HASH)    + std::string("\n");
-  std::string macroFlags = "Macro Flags     = " + std::string(MACRO_FLAGS) + std::string("\n\n");
+  std::string header = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n");
+  header += "Macro Flags     = " + std::string(MACRO_FLAGS) + std::string("\n");
+  header += "Note that the timers all skip the first time step since it always takes longer." + std::string("\n") +
+            "To find the average time divide the time shown by n_steps-1" + std::string("\n");
 
-  header = "#n_proc  nx  ny  nz  n_omp  n_steps  ";
+  header += std::string("\n") + "#n_proc  nx  ny  nz  n_omp  n_steps  ";
 
-  for (OneTime* x : onetimes){
+  for (OneTime* x : onetimes) {
     header += x->name;
     header += "  ";
   }
@@ -142,26 +196,25 @@ void Time::Print_Average_Times( struct parameters P ){
   header += " \n";
 
   bool file_exists = false;
-  if (FILE *file = fopen(file_name.c_str(), "r")){
+  if (FILE* file = fopen(file_name.c_str(), "r")) {
     file_exists = true;
-    chprintf( " File exists, appending values: %s \n", file_name.c_str() );
-    fclose( file );
-  } else{
-    chprintf( " Creating File: %s \n", file_name.c_str() );
+    chprintf(" File exists, appending values: %s \n", file_name.c_str());
+    fclose(file);
+  } else {
+    chprintf(" Creating File: %s \n", file_name.c_str());
   }
 
   #ifdef MPI_CHOLLA
-  if ( procID != 0 ) return;
+  if (procID != 0) {
+    return;
+  }
   #endif
 
   std::ofstream out_file;
 
-// Output timing values
+  // Output timing values
   out_file.open(file_name.c_str(), std::ios::app);
-  if ( !file_exists )
-  {
-    out_file << gitHash;
-    out_file << macroFlags;
+  if (!file_exists) {
     out_file << header;
   }
   #ifdef MPI_CHOLLA
@@ -177,15 +230,40 @@ void Time::Print_Average_Times( struct parameters P ){
   #endif
   out_file << n_steps << " ";
 
-  for (OneTime* x : onetimes){
+  for (OneTime* x : onetimes) {
     out_file << x->t_all << " ";
   }
 
   out_file << "\n";
   out_file.close();
 
-  chprintf( "Saved Timing: %s \n\n", file_name.c_str() );
-
+  chprintf("Saved Timing: %s \n\n", file_name.c_str());
 }
 
+#endif  // CPU_TIME
+
+ScopedTimer::ScopedTimer(const char* input_name)
+{
+#ifdef CPU_TIME
+  name       = input_name;
+  time_start = Get_Time();
 #endif
+}
+
+ScopedTimer::~ScopedTimer(void)
+{
+#ifdef CPU_TIME
+  double time_elapsed_ms = (Get_Time() - time_start) * 1000;
+
+  #ifdef MPI_CHOLLA
+  double t_min = ReduceRealMin(time_elapsed_ms);
+  double t_max = ReduceRealMax(time_elapsed_ms);
+  double t_avg = ReduceRealAvg(time_elapsed_ms);
+  #else
+  double t_min = time_elapsed_ms;
+  double t_max = time_elapsed_ms;
+  double t_avg = time_elapsed_ms;
+  #endif  // MPI_CHOLLA
+  chprintf("ScopedTimer Min: %9.4f ms Max: %9.4f ms Avg: %9.4f ms %s \n", t_min, t_max, t_avg, name);
+#endif  // CPU_TIME
+}
diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h
index d11db642a..96cceea15 100644
--- a/src/utils/timing_functions.h
+++ b/src/utils/timing_functions.h
@@ -1,48 +1,51 @@
-#ifdef CPU_TIME
 #ifndef TIMING_FUNCTIONS_H
 #define TIMING_FUNCTIONS_H
 
 #include <vector>
-#include "../global/global.h"
 
-// Each instance of this class represents a single timer, timing a single section of code. 
-// All instances have their own n_steps, time_start, etc. so that all timers can run independently
+#include "../global/global.h"  // Provides Real, Get_Time
+
+// #ifdef CPU_TIME
+//  Each instance of this class represents a single timer, timing a single
+//  section of code. All instances have their own n_steps, time_start, etc. so
+//  that all timers can run independently
 class OneTime
 {
  public:
   const char* name;
-  int n_steps = 0;
-  Real time_start;
-  Real t_min;
-  Real t_max;
-  Real t_avg;
-  Real t_all=0;
-  bool inactive=true;
-  OneTime(void){
-  }
-  OneTime(const char* input_name){
-    name = input_name;
-    inactive=false;
+  int n_steps     = 0;
+  Real time_start = 0;
+  Real t_min      = 0;
+  Real t_max      = 0;
+  Real t_avg      = 0;
+  Real t_all      = 0;
+  bool inactive   = true;
+  OneTime(void) {}
+  OneTime(const char* input_name)
+  {
+    name     = input_name;
+    inactive = false;
   }
   void Start();
   void Subtract(Real time_to_subtract);
-  void End();
+  void End(bool const print_high_values = false);
   void PrintStep();
   void PrintAverage();
   void PrintAll();
-  void RecordTime( Real time );
+  void RecordTime(Real time);
 };
 
-// Time loops through instances of OneTime. onetimes is initialized with pointers to each timer. 
+// Time loops through instances of OneTime. onetimes is initialized with
+// pointers to each timer.
 //
 class Time
 {
-public:
-
+ public:
   int n_steps;
 
   OneTime Total;
   OneTime Calc_dt;
+  OneTime Hydro_Integrator;
   OneTime Hydro;
   OneTime Boundaries;
   OneTime Grav_Potential;
@@ -52,18 +55,34 @@ class Time
   OneTime Part_Dens_Transf;
   OneTime Advance_Part_1;
   OneTime Advance_Part_2;
-  OneTime Cooling;
+  OneTime Cooling_GPU;
+  OneTime Cooling_Grackle;
   OneTime Chemistry;
-    
+  OneTime Feedback;
+  OneTime FeedbackAnalysis;
+
   std::vector<OneTime*> onetimes;
-  
+
   Time();
   void Initialize();
   void Print_Times();
-  void Print_Average_Times( struct parameters P );
-  
+  void Print_Average_Times(struct Parameters P);
 };
+// #endif  // CPU_TIME
 
+// ScopedTimer does nothing if CPU_TIME is disabled
+/* \brief ScopedTimer helps time a scope. Initialize as first variable and C++ guarantees it is destroyed last */
+class ScopedTimer
+{
+ public:
+  const char* name;
+  double time_start = 0;
+
+  /* \brief ScopedTimer Constructor initializes name and time */
+  ScopedTimer(const char* input_name);
+
+  /* \brief ScopedTimer Destructor computes dt and prints */
+  ~ScopedTimer(void);
+};
 
-#endif
-#endif //CPU_TIME
+#endif  // TIMING_FUNCTIONS_H
diff --git a/tools/analyze_tidy_checks.py b/tools/analyze_tidy_checks.py
new file mode 100755
index 000000000..de5c86313
--- /dev/null
+++ b/tools/analyze_tidy_checks.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+ This script analyzes the clang-tidy output and produces an ordered list of all
+ the checks run, how many failures a check generated and the percentage of
+ failures a check represents.
+
+ When running, make sure that you have already run clang-tidy with all the
+ checks you want enabled since this script looks for the 2 tidy_results_*.log
+ files in the root directory of Cholla
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import pathlib
+import subprocess
+
+
+def main():
+    # Determine path to Cholla directory
+    chollaPath = pathlib.Path(__file__).resolve().parent.parent
+
+    # Load required data
+    tidyResults = loadTidyResults(chollaPath)
+    enabledChecks = getEnabledChecks(chollaPath)
+
+    # Count and sort the errors
+    sortedChecks, totalWarnings, numPassing, numFailing = countAndSort(
+        tidyResults, enabledChecks
+    )
+
+    # Print Results in markdown format
+    printResults(sortedChecks, totalWarnings, numPassing, numFailing)
+
+
+def loadTidyResults(chollaPath):
+    with open(chollaPath / "tidy_results_cpp.log", "r") as file:
+        cppData = file.read()
+    with open(chollaPath / "tidy_results_gpu.log", "r") as file:
+        gpuData = file.read()
+
+    return cppData + gpuData
+
+
+def getEnabledChecks(chollaPath):
+    stdout = subprocess.run(
+        ["clang-tidy", "--list-checks"], cwd=chollaPath, stdout=subprocess.PIPE
+    ).stdout.decode("utf-8")
+
+    # find index where checks start
+    stdout = stdout.split()
+    for i in range(len(stdout)):
+        if "bugprone" in stdout[i]:
+            index = i
+            break
+
+    return stdout[index:]
+
+
+def countAndSort(tidyResults, enabledChecks):
+    passingChecks = 0
+    failingChecks = 0
+    numWarnings = np.zeros(len(enabledChecks))
+
+    for i, check in enumerate(enabledChecks):
+        numWarnings[i] = tidyResults.count(check)
+        if check in tidyResults:
+            failingChecks += 1
+        else:
+            passingChecks += 1
+
+    # Convert to dataframe and sort
+    sortedChecks = sorted(list(zip(numWarnings, enabledChecks)))
+    sortedChecks.reverse()
+    totalWarnings = numWarnings.sum()
+
+    return sortedChecks, totalWarnings, passingChecks, failingChecks
+
+
+def printResults(sortedChecks, totalWarnings, numPassing, numFailing):
+    # Determine percentages
+    totalChecks = numPassing + numFailing
+
+    print(f"Total number of warnings: {int(totalWarnings)}")
+    print(f"{round(numPassing/totalChecks*100, 2)}% of checks passing")
+    print(f"{round(numFailing/totalChecks*100, 2)}% of checks failing")
+
+    col1Title = "Number of Warnings"
+    col2Title = "Percentage of Warnings"
+    col3Title = "Check"
+    col3Length = np.max([len(entry[1]) for entry in sortedChecks])
+
+    print()
+    print("Failing Checks:")
+    print(f"| {col1Title} | {col2Title} | {col3Title:{col3Length}} |")
+    print(f'| {"-"*len(col1Title)} | {"-"*len(col2Title)} | {"-"*col3Length} |')
+    for entry in sortedChecks:
+        if int(entry[0]) != 0:
+            print(
+                f"| {int(entry[0]):18} | {(entry[0] / totalWarnings)*100:22.2f} | {entry[1]:{col3Length}} |"
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/cholla-nv-compute-sanitizer.sh b/tools/cholla-nv-compute-sanitizer.sh
new file mode 100755
index 000000000..ece87b3e0
--- /dev/null
+++ b/tools/cholla-nv-compute-sanitizer.sh
@@ -0,0 +1,105 @@
+#!/usr/bin/env bash
+
+# Utility script for running the NVIDIA Compute Sanitizer.
+# The Compute Sanitizer provides 4 tool:
+# - Memcheck: The memory access error and leak detection tool.
+# - Racecheck: The shared memory data access hazard detection tool.
+# - Initcheck: The uninitialized device global memory access detection tool.
+# - Synccheck: The thread synchronization hazard detection tool.
+#
+# See the NVIDIA docs for more detail:
+# https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html
+#
+# Syntax: compute-sanitizer [options] app_name [app_options]
+#
+# Compilation: Benefits from -G and -lineinfo. -Xcompiler -rdynamic for backtraces
+
+# Memcheck args
+# --leak-check full/no (default: no) full = info about memory leaks
+# --padding NUM, puts padding around arrays to improve out-of-bounds checking.
+# NUM is The size of the pad in bytes, we should probably pad at least a couple
+# of doubles, say 8 so pad=8*8=64
+#
+# initcheck args
+# --track-unused-memory yes/no (default: no) Check for unused memory allocations.
+#
+# Racecheck args
+# - --print-level info
+
+
+#set -x #echo all commands
+while getopts "t:h" opt; do
+    case $opt in
+        t)  # Set the tool to use
+            case ${OPTARG} in
+                m)
+                    tool="memcheck"
+                    tool_args="--leak-check full --padding 64 --report-api-errors all"
+                    ;;
+                r)
+                    tool="racecheck"
+                    tool_args="--print-level info"
+                    ;;
+                i)
+                    tool="initcheck"
+                    tool_args="--track-unused-memory yes"
+                    ;;
+                s)
+                    tool="synccheck"
+                    tool_args=""
+                    ;;
+            esac
+            ;;
+        h)  # Print help
+            echo -e "
+While not required the following compile flags can help: -G for debug builds,
+-lineinfo for performance builds (can't be used with -G) and -Xcompiler -rdynamic
+is useful for backtraces in all builds.
+
+Options:
+-t m/r/i/s: Selects the tool to use.
+    m: runs the memcheck tool
+    r: runs the racecheck tool
+    i: runs the initcheck tool
+    s: runs the synccheck tool
+-h: This dialogue"
+            exit 0
+            ;;
+        \?)
+            echo "Invalid option: -${OPTARG}" >&2
+            exit 1
+            ;;
+        :)
+            echo "Option -${OPTARG} requires an argument." >&2
+            exit 1
+            ;;
+    esac
+done
+
+# Exit if no tool was selected
+if [ -z "$tool" ]; then
+        echo 'Missing tool argument' >&2
+        exit 1
+fi
+
+# Get Paths
+cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")"
+cholla_exe=$(find "${cholla_root}" -name cholla.*)
+cholla_parameter_file="${cholla_root}/examples/3D/sod.txt"
+COMPUTE_SANITIZER=$(which compute-sanitizer)
+sanitizer_log_file="${cholla_root}/bin/compute-sanitizer-${tool}.log"
+
+# Echo Paths
+echo -e "cholla_root           = ${cholla_root}"
+echo -e "cholla_exe            = ${cholla_exe}"
+echo -e "cholla_parameter_file = ${cholla_parameter_file}"
+echo -e "COMPUTE_SANITIZER     = ${COMPUTE_SANITIZER}"
+echo -e "sanitizer_log_file    = ${sanitizer_log_file}"
+echo -e ""
+echo -e "tool      = ${tool}"
+echo -e "tool_args = ${tool_args}"
+
+# Execute Sanitizer
+COMMAND="${COMPUTE_SANITIZER} --log-file ${sanitizer_log_file} --tool ${tool} ${tool_args} ${cholla_exe} ${cholla_parameter_file}"
+echo -e "Launch Command = ${COMMAND}"
+$COMMAND
\ No newline at end of file
diff --git a/tools/clang-format_runner.sh b/tools/clang-format_runner.sh
new file mode 100755
index 000000000..ece80ec67
--- /dev/null
+++ b/tools/clang-format_runner.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Description:
+# Run clang-format on all the source files in Cholla. Any command line arguments
+# provided to this script are passed directly to clang-format
+#
+# Dependencies:
+# - clang-format v15 or greater
+# - GNU Find, the default macos version won't work
+
+# Get the location of Cholla
+cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")"
+cd $cholla_root
+
+# Get a list of all the files to format
+readarray -t files <<<$(find ${cholla_root} -regex '.*\.\(h\|hpp\|c\|cpp\|cu\|cuh\)$' -print)
+
+clang-format -i --verbose "$@" -style="file" "${files[@]}"
\ No newline at end of file
diff --git a/tools/clang-tidy_runner.sh b/tools/clang-tidy_runner.sh
new file mode 100755
index 000000000..6f2915b8f
--- /dev/null
+++ b/tools/clang-tidy_runner.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Description:
+# Run clang-tidy on all build types in parallel. Note that this spawns 2x the
+# number of build types threads since each type has a thread for the CPU code
+# and a thread for the GPU code
+
+# If ctrl-c is sent trap it and kill all clang-tidy processes
+trap "kill -- -$$" EXIT
+
+# cd into the Cholla directory. Default to ${HOME}/Code/cholla
+cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")"
+cd $cholla_root
+
+# Run all clang-tidy build types in parallel
+builds=( hydro gravity disk particles cosmology mhd dust cooling)
+for build in "${builds[@]}"
+do
+  make tidy TYPE=$build &
+done
+
+# Wait for clang-tidy to finish
+wait